A Discrete-Event Network Simulator
API
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
hash-example.cc
Go to the documentation of this file.
1 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
2 /*
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License version 2 as
5  * published by the Free Software Foundation;
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software
14  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15  */
16 
17 #include <algorithm> // find
18 #include <climits> // CHAR_BIT
19 #include <fstream>
20 #include <iostream>
21 #include <iomanip>
22 #include <map>
23 #include <vector>
24 
25 #include "ns3/core-module.h"
26 #include "ns3/hash.h"
27 
28 NS_LOG_COMPONENT_DEFINE ("Hasher");
29 
30 namespace ns3
31 {
32 namespace Hash
33 {
34 
39 namespace Example
40 {
41 
45 class Collider {
46 
47 public:
48  std::string m_name;
51  enum Bits {
54  };
55 
63  Collider (const std::string name, Hasher hash, const enum Bits bits)
64  : m_name (name),
65  m_hash (hash),
66  m_bits (bits)
67  { }
68 
75  bool Add (const std::string phrase)
76  {
77  uint64_t h = GetHash (phrase);
78 
79  // Check for collisions
80  if (m_dict.count (h) > 0)
81  {
82  // we've seen this hash before, why?
83  if (phrase == m_dict[h])
84  {
85  // duplicate phrase
86  return false;
87  }
88 
89  // new phrase generating a real collision
90  // alphabetize
91  if ( m_dict[h] < phrase)
92  {
93  m_coll.push_back (std::make_pair (h, phrase));
94  }
95  else
96  {
97  m_coll.push_back (std::make_pair (h, m_dict[h]));
98  m_dict[h] = phrase;
99  }
100  }
101  else
102  {
103  // Insert new hash
104  m_dict.insert (std::make_pair (h, phrase));
105  }
106  return true;
107  } // Add ()
108 
112  std::string GetName () const
113  {
114  std::string name = m_name;
115 
116  switch (m_bits)
117  {
118  case Bits32: name += " (32-bit version)"; break;
119  case Bits64: name += " (64-bit version)"; break;
120  default: name += " (unknown!?!)";
121  }
122  return name;
123  }
124 
126  void Report () const
127  {
128  std::cout << std::endl;
129 
130  std::cout << GetName () << ": " << m_coll.size () << " collisions:"
131  << std::endl;
132  for (collision_t::const_iterator it = m_coll.begin ();
133  it != m_coll.end ();
134  ++it)
135  {
136  uint64_t h = it->first;
137 
138  std::cout << std::setfill ('0') << std::hex << std::setw(8) << h
139  << std::dec << std::setfill(' ') << " "
140  << std::setw(20) << std::left
141  << m_dict.find (h)->second
142  << it->second
143  << std::right
144  << std::endl;
145  }
146  } // Report ()
147 
148 
149 private:
150 
157  uint64_t GetHash (const std::string phrase)
158  {
159  m_hash.clear ();
160  uint64_t h = 0;
161 
162  if (m_bits == Bits32)
163  {
164  h = m_hash.GetHash32 (phrase);
165  }
166  else
167  {
168  h = m_hash.GetHash64 (phrase);
169  }
170  return h;
171  }
172 
174  enum Bits m_bits;
175 
177  typedef std::map <uint64_t, std::string> hashdict_t;
178 
181 
183  typedef std::vector < std::pair<uint64_t, std::string> > collision_t;
184 
187 
188 }; // class Collider
189 
190 
194 class Dictionary {
195 public:
198  : m_nphrases (0)
199  {
200  m_words.reserve (320000);
201  }
202 
204  void Add (Collider c)
205  {
206  m_hashes.push_back (c);
207  }
208 
214  void Add (const std::string phrase)
215  {
216  if (phrase.size () == 0)
217  {
218  return ;
219  }
220 
221  int newPhrases = 0;
222  for (std::vector <Collider>::iterator it = m_hashes.begin ();
223  it != m_hashes.end ();
224  ++it)
225  {
226  newPhrases += it->Add (phrase);
227  }
228 
229  if (newPhrases)
230  {
231  ++m_nphrases;
232  m_words.push_back (phrase);
233  }
234  } // Add ()
235 
274  {
275  // Expected number of collisions
276  //
277  // Number of buckets = k = 2^bits
278  long double k32 = 0xFFFFFFFF;
279  long double k64 = 0xFFFFFFFFFFFFFFFFULL;
280 
281  long double n = m_nphrases;
282  long double Ec32 = n * (n - 1) / ( 2 * k32) * (1 - (n - 2)/(3 * k32));
283  long double Ec64 = n * (n - 1) / ( 2 * k64) * (1 - (n - 2)/(3 * k64));
284 
285  // Output collisions
286  std::cout << "" << std::endl;
287  std::cout << "Number of words or phrases: " << n << std::endl;
288  std::cout << "Expected number of collisions: (32-bit table) " << Ec32
289  << std::endl;
290  std::cout << "Expected number of collisions: (64-bit table) " << Ec64
291  << std::endl;
292  } // ReportExpectedCollisions
293 
294 
296  void Report () const
297  {
299 
300  for (std::vector <Collider>::const_iterator it = m_hashes.begin ();
301  it != m_hashes.end ();
302  ++it)
303  {
304  it->Report ();
305  }
306  } // Report ()
307 
313  void TimeOne (const int hindex)
314  {
315  // Hashing speed
316  uint32_t reps = 100;
317  Hasher h = m_hashes[hindex].m_hash;
318  int start = clock ();
319  for (std::vector<std::string>::const_iterator w = m_words.begin ();
320  w != m_words.end();
321  ++w)
322  {
323  for (uint32_t i = 0; i < reps; ++i)
324  {
325  h.clear ().GetHash32 (*w);
326  }
327  }
328  int stop = clock ();
329  double delta = stop - start;
330  double per = 1e9 * delta / (m_nphrases * reps * CLOCKS_PER_SEC);
331 
332  std::cout << std::left
333  << std::setw (32) << m_hashes[hindex].GetName ()
334  << std::right
335  << std::setw (10) << m_nphrases
336  << std::setw (10) << reps
337  << std::setw (10) << stop - start
338  << std::setw (12) << per
339  << std::endl;
340 
341  } // TimeOne ()
342 
344  void Time ()
345  {
346  std::cout << "" << std::endl;
347  std::cout << std::left
348  << std::setw (32) << "Hash timing"
349  << std::right
350  << std::setw (10) << "Phrases"
351  << std::setw (10) << "Reps"
352  << std::setw (10) << "Ticks"
353  << std::setw (12) << "ns/hash"
354  << std::endl;
355 
356  for (unsigned int i = 0; i < m_hashes.size (); ++i)
357  {
358  TimeOne (i);
359  }
360  } // Time ()
361 
362 private:
363  unsigned long m_nphrases;
364  std::vector <Collider> m_hashes;
365  std::vector <std::string> m_words;
367 }; // class Dictionary
368 
369 
374 {
375 
376 public:
377 
384  bool Add (const std::string file)
385  {
386  if (std::find (m_files.begin (), m_files.end (), file) == m_files.end ())
387  {
388  m_files.push_back (file);
389  }
390 
391  return true;
392  }
393 
399  void ReadInto (Dictionary & dict)
400  {
401  if (m_files.size () == 0)
402  {
403  Add ("/usr/share/dict/web2");
404  }
405 
406  std::cout << "Hashing the dictionar"
407  << (m_files.size () == 1 ? "y" : "ies")
408  << std::endl;
409 
410  for (std::vector <std::string>::const_iterator it = m_files.begin ();
411  it != m_files.end ();
412  ++it)
413  {
414  std::string dictFile = *it;
415  std::cout << "Dictionary file: " << dictFile << std::endl;
416 
417  // Find collisions
418 
419  // Open the file
420  std::ifstream dictStream;
421  dictStream.open (dictFile.c_str () );
422  if (! dictStream.is_open () )
423  {
424  std::cerr << "Failed to open dictionary file."
425  << "'" << dictFile << "'"
426  << std::endl;
427  continue;
428  }
429 
430  while (dictStream.good () )
431  {
432  std::string phrase;
433  getline (dictStream, phrase);
434  dict.Add (phrase);
435  } // while
436 
437  dictStream.close ();
438 
439  } // for m_files
440 
441  } // ReadInto
442 
443 private:
444  std::vector <std::string> m_files;
446 }; // class DictFiles
447 
448 } // namespace Example
449 
450 } // namespace Hash
451 
452 } // namespace ns3
453 
454 
455 using namespace ns3;
456 using namespace ns3::Hash::Example;
457 
458 int
459 main (int argc, char *argv[])
460 {
461  std::cout << std::endl;
462  std::cout << "Hasher" << std::endl;
463 
464  bool timing = false;
465  DictFiles files;
466 
467  CommandLine cmd;
468  cmd.Usage ("Find hash collisions in the dictionary.");
469  cmd.AddValue ("dict", "Dictionary file to hash",
471  &files));
472  cmd.AddValue ("time", "Run timing test", timing);
473  cmd.Parse (argc, argv);
474 
475  Dictionary dict;
476  dict.Add ( Collider ("FNV1a",
477  Hasher ( Create<Hash::Function::Fnv1a> () ),
479  dict.Add ( Collider ("FNV1a",
480  Hasher ( Create<Hash::Function::Fnv1a> () ),
482 
483  dict.Add ( Collider ("Murmur3",
484  Hasher ( Create<Hash::Function::Murmur3> () ),
486  dict.Add ( Collider ("Murmur3",
487  Hasher ( Create<Hash::Function::Murmur3> () ),
489 
490  files.ReadInto (dict);
491 
492  dict.Report ();
493 
494  if (timing)
495  {
496  dict.Time ();
497  } // if (timing)
498 
499 
500 } // main
501 
502 
503 /* Example Output:
504 
505 ./waf --run="hasher-example --time \
506  --dict=/usr/share/dict/web2 \
507  --dict=/usr/share/dict/web2a \
508  --dict=/usr/share/dict/propernames \
509  --dict=/usr/share/dict/connectives"
510 
511 Waf: Entering directory `build'
512 Waf: Leaving directory `build'
513 'build' finished successfully (3.028s)
514 
515 Hasher
516 Hashing the dictionaries
517 Dictionary file: /usr/share/dict/web2
518 Dictionary file: /usr/share/dict/web2a
519 Dictionary file: /usr/share/dict/propernames
520 Dictionary file: /usr/share/dict/connectives
521 
522 Number of words or phrases: 312094
523 Expected number of collisions: (32-bit table) 11.3389
524 Expected number of collisions: (64-bit table) 2.6401e-09
525 
526 FNV1a (32-bit version): 13 collisions:
527 a75b0ae7 elephantlike interventralia
528 091c4808 diversionary propenseness
529 172be6ba bairnishness sora
530 e6cb5099 purifier spongoblastic
531 4a841078 ameliorable unsmotherable
532 6ed21de2 brand-newness peripherial
533 22acb19b Petrarchism dewy-pinioned
534 5723634a grain gold hyphenation
535 f58026c1 seven-channeled turritella
536 946fc6ec multiradiate sister block
537 88625851 brachtmema ule tree
538 dc28b5ea Un-lutheran gutturotetany
539 9255bf44 re-sorter working stress
540 
541 FNV1a (64-bit version): 0 collisions:
542 
543 Murmur3 (32-bit version): 11 collisions:
544 5ea83eee impalace metahewettite
545 e06fbdde constancy oligosynthetic
546 2a713795 hypermonosyllable presatisfaction
547 c8bf0ef9 Hadromerina starky
548 d9c04b3d Accipiter syllable
549 c0da8f81 seriation trigonon
550 17612b26 daemon unerring
551 c2349ad7 air spring iron
552 1d91386f nine-pounder semicrescentic
553 fe17b1a5 cone speaker oblong-wedgeshaped
554 faa12798 saw bearing wilting point
555 
556 Murmur3 (64-bit version): 0 collisions:
557 
558 Hash timing Phrases Reps Ticks ns/hash
559 FNV1a (32-bit version) 312094 100 3140531 100.628
560 FNV1a (64-bit version) 312094 100 3145240 100.779
561 Murmur3 (32-bit version) 312094 100 4152139 133.041
562 Murmur3 (64-bit version) 312094 100 4191464 134.301
563 
564 */
Use 32-bit hash function.
Definition: hash-example.cc:52
std::vector< std::pair< uint64_t, std::string > > collision_t
Collision map of subsequent instances.
void Add(const std::string phrase)
Add a string to the dictionary.
void TimeOne(const int hindex)
Time and report the execution of one hash across the entire Dictionary.
std::string GetName() const
#define NS_LOG_COMPONENT_DEFINE(name)
Define a Log component with a specific name.
Definition: log.h:170
void Report() const
Print the collisions found.
void Time()
Report the execution time of each hash across the entire Dictionary.
void ReportExpectedCollisions() const
Report the expected number of collisions.
bool Add(const std::string file)
CommandLine callback function to add a file argument to the list.
bool Add(const std::string phrase)
Add a string to the Collider.
Definition: hash-example.cc:75
unsigned long m_nphrases
Number of strings hashed.
void Usage(const std::string usage)
Supply the program usage and documentation.
Definition: command-line.cc:87
Word list and hashers to test.
Collider(const std::string name, Hasher hash, const enum Bits bits)
Constructor.
Definition: hash-example.cc:63
uint32_t GetHash32(const char *buffer, const size_t size)
Compute 32-bit hash of a byte buffer.
Definition: hash.h:221
Callback< R > MakeCallback(R(T::*memPtr)(void), OBJ objPtr)
Definition: callback.h:1242
std::vector< std::string > m_words
List of unique words.
Parse command-line arguments.
Definition: command-line.h:177
void ReadInto(Dictionary &dict)
Add phrases from the files into the dict.
Keep track of collisions.
Definition: hash-example.cc:45
std::string m_name
Name of this hash.
Definition: hash-example.cc:48
Hasher & clear(void)
Restore initial state.
Definition: hash.cc:42
Source word list files.
Use 64-bit hash function.
Definition: hash-example.cc:53
std::vector< std::string > m_files
List of word files to use.
void Report() const
Print the collisions for each Collider.
enum Bits m_bits
Hash function.
std::vector< Collider > m_hashes
List of hash Colliders.
void Add(Collider c)
Add a Collider containing a hash function.
void AddValue(const std::string &name, const std::string &help, T &value)
Add a program argument, assigning to POD.
Definition: command-line.h:435
collision_t m_coll
The list of collisions.
void Parse(int argc, char *argv[])
Parse the program arguments.
hashdict_t m_dict
The dictionary map, indexed by hash.
uint64_t GetHash64(const char *buffer, const size_t size)
Compute 64-bit hash of a byte buffer.
Definition: hash.h:229
int main(int argc, char *argv[])
std::map< uint64_t, std::string > hashdict_t
Hashed dictionary of first instance of each hash.
Generic Hash function interface.
Definition: hash.h:78
uint64_t GetHash(const std::string phrase)
Get the appropriate hash value.