A Discrete-Event Network Simulator
API
hash-example.cc
Go to the documentation of this file.
1 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
2 /*
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License version 2 as
5  * published by the Free Software Foundation;
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software
14  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15  */
16 
17 #include <algorithm> // find
18 #include <climits> // CHAR_BIT
19 #include <fstream>
20 #include <iostream>
21 #include <iomanip>
22 #include <map>
23 #include <vector>
24 
25 #include "ns3/core-module.h"
26 #include "ns3/hash.h"
27 
105 namespace ns3 {
106 
107 NS_LOG_COMPONENT_DEFINE ("Hasher");
108 
109 namespace Hash {
110 
115 namespace Example {
116 
120 class Collider
121 {
122 
123 public:
124  std::string m_name;
128  enum Bits
129  {
132  };
133 
141  Collider (const std::string name, Hasher hash, const enum Bits bits)
142  : m_name (name),
143  m_hash (hash),
144  m_bits (bits)
145  { }
146 
153  bool Add (const std::string phrase)
154  {
155  uint64_t h = GetHash (phrase);
156 
157  // Check for collisions
158  if (m_dict.count (h) > 0)
159  {
160  // we've seen this hash before, why?
161  if (phrase == m_dict[h])
162  {
163  // duplicate phrase
164  return false;
165  }
166 
167  // new phrase generating a real collision
168  // alphabetize
169  if ( m_dict[h] < phrase)
170  {
171  m_coll.push_back (std::make_pair (h, phrase));
172  }
173  else
174  {
175  m_coll.push_back (std::make_pair (h, m_dict[h]));
176  m_dict[h] = phrase;
177  }
178  }
179  else
180  {
181  // Insert new hash
182  m_dict.insert (std::make_pair (h, phrase));
183  }
184  return true;
185  } // Add ()
186 
190  std::string GetName () const
191  {
192  std::string name = m_name;
193 
194  switch (m_bits)
195  {
196  /* *NS_CHECK_STYLE_OFF* */
197  case Bits32: name += " (32-bit version)"; break;
198  case Bits64: name += " (64-bit version)"; break;
199  default: name += " (unknown!?!)";
200  /* *NS_CHECK_STYLE_ON* */
201  }
202  return name;
203  }
204 
206  void Report () const
207  {
208  std::cout << std::endl;
209 
210  std::cout << GetName () << ": " << m_coll.size () << " collisions:"
211  << std::endl;
212  for (auto collision : m_coll)
213  {
214  uint64_t h = collision.first;
215 
216  std::cout << std::setfill ('0') << std::hex << std::setw (8) << h
217  << std::dec << std::setfill (' ') << " "
218  << std::setw (20) << std::left
219  << m_dict.find (h)->second
220  << collision.second
221  << std::right
222  << std::endl;
223  }
224  } // Report ()
225 
226 private:
227 
234  uint64_t GetHash (const std::string phrase)
235  {
236  m_hash.clear ();
237  uint64_t h = 0;
238 
239  if (m_bits == Bits32)
240  {
241  h = m_hash.GetHash32 (phrase);
242  }
243  else
244  {
245  h = m_hash.GetHash64 (phrase);
246  }
247  return h;
248  }
249 
251  enum Bits m_bits;
252 
254  typedef std::map <uint64_t, std::string> hashdict_t;
255 
258 
260  typedef std::vector < std::pair<uint64_t, std::string> > collision_t;
261 
264 
265 }; // class Collider
266 
267 
272 {
273 public:
276  : m_nphrases (0)
277  {
278  m_words.reserve (320000);
279  }
280 
286  void Add (Collider c)
287  {
288  m_hashes.push_back (c);
289  }
290 
296  void Add (const std::string phrase)
297  {
298  if (phrase.size () == 0)
299  {
300  return;
301  }
302 
303  int newPhrases = 0;
304  for (auto & collider : m_hashes)
305  {
306  newPhrases += collider.Add (phrase);
307  }
308 
309  if (newPhrases)
310  {
311  ++m_nphrases;
312  m_words.push_back (phrase);
313  }
314  } // Add ()
315 
354  {
355  // Expected number of collisions
356  //
357  // Number of buckets = k = 2^bits
358  long double k32 = 0xFFFFFFFF;
359  long double k64 = static_cast<long double> (0xFFFFFFFFFFFFFFFFULL);
360 
361  long double n = m_nphrases;
362  long double Ec32 = n * (n - 1) / ( 2 * k32) * (1 - (n - 2) / (3 * k32));
363  long double Ec64 = n * (n - 1) / ( 2 * k64) * (1 - (n - 2) / (3 * k64));
364 
365  // Output collisions
366  std::cout << "" << std::endl;
367  std::cout << "Number of words or phrases: " << n << std::endl;
368  std::cout << "Expected number of collisions: (32-bit table) " << Ec32
369  << std::endl;
370  std::cout << "Expected number of collisions: (64-bit table) " << Ec64
371  << std::endl;
372  } // ReportExpectedCollisions
373 
374 
376  void Report () const
377  {
379 
380  for (auto collider : m_hashes)
381  {
382  collider.Report ();
383  }
384  } // Report ()
385 
391  void TimeOne (const Collider & collider)
392  {
393  // Hashing speed
394  uint32_t reps = 100;
395  Hasher h = collider.m_hash;
396  int start = clock ();
397  for (auto const & word : m_words)
398  {
399  for (uint32_t i = 0; i < reps; ++i)
400  {
401  h.clear ().GetHash32 (word);
402  }
403  }
404  int stop = clock ();
405  double delta = stop - start;
406  double per = 1e9 * delta / (m_nphrases * reps * CLOCKS_PER_SEC);
407 
408  std::cout << std::left
409  << std::setw (32) << collider.GetName ()
410  << std::right
411  << std::setw (10) << m_nphrases
412  << std::setw (10) << reps
413  << std::setw (10) << stop - start
414  << std::setw (12) << per
415  << std::endl;
416 
417  } // TimeOne ()
418 
420  void Time ()
421  {
422  std::cout << "" << std::endl;
423  std::cout << std::left
424  << std::setw (32) << "Hash timing"
425  << std::right
426  << std::setw (10) << "Phrases"
427  << std::setw (10) << "Reps"
428  << std::setw (10) << "Ticks"
429  << std::setw (12) << "ns/hash"
430  << std::endl;
431 
432  for (auto const & collider : m_hashes)
433  {
434  TimeOne (collider);
435  }
436  } // Time ()
437 
438 private:
439  unsigned long m_nphrases;
440  std::vector <Collider> m_hashes;
441  std::vector <std::string> m_words;
443 }; // class Dictionary
444 
445 
450 {
451 
452 public:
453 
460  bool Add (const std::string file)
461  {
462  if (std::find (m_files.begin (), m_files.end (), file) == m_files.end ())
463  {
464  m_files.push_back (file);
465  }
466 
467  return true;
468  }
469 
471  static std::string GetDefault (void)
472  {
473  return "/usr/share/dict/words";
474  }
475 
481  void ReadInto (Dictionary & dict)
482  {
483  if (m_files.size () == 0)
484  {
485  Add (GetDefault ());
486  }
487 
488  std::cout << "Hashing the dictionar"
489  << (m_files.size () == 1 ? "y" : "ies")
490  << std::endl;
491 
492  for (auto dictFile : m_files)
493  {
494  std::cout << "Dictionary file: " << dictFile << std::endl;
495 
496  // Find collisions
497 
498  // Open the file
499  std::ifstream dictStream;
500  dictStream.open (dictFile.c_str () );
501  if (!dictStream.is_open () )
502  {
503  std::cerr << "Failed to open dictionary file."
504  << "'" << dictFile << "'"
505  << std::endl;
506  continue;
507  }
508 
509  while (dictStream.good () )
510  {
511  std::string phrase;
512  getline (dictStream, phrase);
513  dict.Add (phrase);
514  } // while
515 
516  dictStream.close ();
517 
518  } // for m_files
519 
520  } // ReadInto
521 
522 private:
523  std::vector <std::string> m_files;
525 }; // class DictFiles
526 
527 } // namespace Example
528 
529 } // namespace Hash
530 
531 } // namespace ns3
532 
533 
534 using namespace ns3;
535 using namespace ns3::Hash::Example;
536 
537 int
538 main (int argc, char *argv[])
539 {
540  std::cout << std::endl;
541  std::cout << "Hasher" << std::endl;
542 
543  bool timing = false;
544  DictFiles files;
545 
546  CommandLine cmd (__FILE__);
547  cmd.Usage ("Find hash collisions in the dictionary.");
548  cmd.AddValue ("dict", "Dictionary file to hash",
550  &files),
552 
553  cmd.AddValue ("time", "Run timing test", timing);
554  cmd.Parse (argc, argv);
555 
556  Dictionary dict;
557  dict.Add ( Collider ("FNV1a",
558  Hasher ( Create<Hash::Function::Fnv1a> () ),
560  dict.Add ( Collider ("FNV1a",
561  Hasher ( Create<Hash::Function::Fnv1a> () ),
563 
564  dict.Add ( Collider ("Murmur3",
565  Hasher ( Create<Hash::Function::Murmur3> () ),
567  dict.Add ( Collider ("Murmur3",
568  Hasher ( Create<Hash::Function::Murmur3> () ),
570 
571  files.ReadInto (dict);
572 
573  dict.Report ();
574 
575  if (timing)
576  {
577  dict.Time ();
578  } // if (timing)
579 
580 
581 } // main
uint32_t GetHash32(const char *buffer, const std::size_t size)
Compute 32-bit hash of a byte buffer.
Definition: hash.h:239
Use 32-bit hash function.
std::vector< std::pair< uint64_t, std::string > > collision_t
Collision map of subsequent instances.
void Add(const std::string phrase)
Add a string to the dictionary.
def start()
Definition: core.py:1855
#define NS_LOG_COMPONENT_DEFINE(name)
Define a Log component with a specific name.
Definition: log.h:205
void Time()
Report the execution time of each hash across the entire Dictionary.
cmd
Definition: second.py:35
bool Add(const std::string file)
CommandLine callback function to add a file argument to the list.
bool Add(const std::string phrase)
Add a string to the Collider.
unsigned long m_nphrases
Number of strings hashed.
Word list and hashers to test.
Collider(const std::string name, Hasher hash, const enum Bits bits)
Constructor.
void ReportExpectedCollisions() const
Report the expected number of collisions.
std::vector< std::string > m_words
List of unique words.
Parse command-line arguments.
Definition: command-line.h:227
void ReadInto(Dictionary &dict)
Add phrases from the files into the dict.
Keep track of collisions.
std::string m_name
Name of this hash.
Hasher & clear(void)
Restore initial state.
Definition: hash.cc:48
Every class exported by the ns3 library is enclosed in the ns3 namespace.
uint64_t GetHash64(const char *buffer, const std::size_t size)
Compute 64-bit hash of a byte buffer.
Definition: hash.h:247
Source word list files.
Use 64-bit hash function.
void Report() const
Print the collisions for each Collider.
std::vector< std::string > m_files
List of word files to use.
enum Bits m_bits
Hash function.
void Report() const
Print the collisions found.
void TimeOne(const Collider &collider)
Time and report the execution of one hash across the entire Dictionary.
std::vector< Collider > m_hashes
List of hash Colliders.
Namespace for hasher-example.
void Add(Collider c)
Add a Collider containing a hash function.
collision_t m_coll
The list of collisions.
hashdict_t m_dict
The dictionary map, indexed by hash.
static std::string GetDefault(void)
Callback< R, Ts... > MakeCallback(R(T::*memPtr)(Ts...), OBJ objPtr)
Build Callbacks for class method members which take varying numbers of arguments and potentially retu...
Definition: callback.h:1642
std::map< uint64_t, std::string > hashdict_t
Hashed dictionary of first instance of each hash.
Generic Hash function interface.
Definition: hash.h:87
Bits
The size of hash function being tested.
std::string GetName() const
uint64_t GetHash(const std::string phrase)
Get the appropriate hash value.