A Discrete-Event Network Simulator
API
hash-example.cc
Go to the documentation of this file.
1 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
2 /*
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License version 2 as
5  * published by the Free Software Foundation;
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software
14  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15  */
16 
17 #include <algorithm> // find
18 #include <climits> // CHAR_BIT
19 #include <fstream>
20 #include <iostream>
21 #include <iomanip>
22 #include <map>
23 #include <vector>
24 
25 #include "ns3/core-module.h"
26 #include "ns3/hash.h"
27 
103 namespace ns3
104 {
105 
106 NS_LOG_COMPONENT_DEFINE ("Hasher");
107 
108 namespace Hash
109 {
110 
115 namespace Example
116 {
117 
121 class Collider {
122 
123 public:
124  std::string m_name;
128  enum Bits {
131  };
132 
140  Collider (const std::string name, Hasher hash, const enum Bits bits)
141  : m_name (name),
142  m_hash (hash),
143  m_bits (bits)
144  { }
145 
152  bool Add (const std::string phrase)
153  {
154  uint64_t h = GetHash (phrase);
155 
156  // Check for collisions
157  if (m_dict.count (h) > 0)
158  {
159  // we've seen this hash before, why?
160  if (phrase == m_dict[h])
161  {
162  // duplicate phrase
163  return false;
164  }
165 
166  // new phrase generating a real collision
167  // alphabetize
168  if ( m_dict[h] < phrase)
169  {
170  m_coll.push_back (std::make_pair (h, phrase));
171  }
172  else
173  {
174  m_coll.push_back (std::make_pair (h, m_dict[h]));
175  m_dict[h] = phrase;
176  }
177  }
178  else
179  {
180  // Insert new hash
181  m_dict.insert (std::make_pair (h, phrase));
182  }
183  return true;
184  } // Add ()
185 
189  std::string GetName () const
190  {
191  std::string name = m_name;
192 
193  switch (m_bits)
194  {
195  case Bits32: name += " (32-bit version)"; break;
196  case Bits64: name += " (64-bit version)"; break;
197  default: name += " (unknown!?!)";
198  }
199  return name;
200  }
201 
203  void Report () const
204  {
205  std::cout << std::endl;
206 
207  std::cout << GetName () << ": " << m_coll.size () << " collisions:"
208  << std::endl;
209  for (collision_t::const_iterator it = m_coll.begin ();
210  it != m_coll.end ();
211  ++it)
212  {
213  uint64_t h = it->first;
214 
215  std::cout << std::setfill ('0') << std::hex << std::setw(8) << h
216  << std::dec << std::setfill(' ') << " "
217  << std::setw(20) << std::left
218  << m_dict.find (h)->second
219  << it->second
220  << std::right
221  << std::endl;
222  }
223  } // Report ()
224 
225 
226 private:
227 
234  uint64_t GetHash (const std::string phrase)
235  {
236  m_hash.clear ();
237  uint64_t h = 0;
238 
239  if (m_bits == Bits32)
240  {
241  h = m_hash.GetHash32 (phrase);
242  }
243  else
244  {
245  h = m_hash.GetHash64 (phrase);
246  }
247  return h;
248  }
249 
251  enum Bits m_bits;
252 
254  typedef std::map <uint64_t, std::string> hashdict_t;
255 
257  hashdict_t m_dict;
258 
260  typedef std::vector < std::pair<uint64_t, std::string> > collision_t;
261 
263  collision_t m_coll;
264 
265 }; // class Collider
266 
267 
271 class Dictionary {
272 public:
275  : m_nphrases (0)
276  {
277  m_words.reserve (320000);
278  }
279 
285  void Add (Collider c)
286  {
287  m_hashes.push_back (c);
288  }
289 
295  void Add (const std::string phrase)
296  {
297  if (phrase.size () == 0)
298  {
299  return ;
300  }
301 
302  int newPhrases = 0;
303  for (std::vector <Collider>::iterator it = m_hashes.begin ();
304  it != m_hashes.end ();
305  ++it)
306  {
307  newPhrases += it->Add (phrase);
308  }
309 
310  if (newPhrases)
311  {
312  ++m_nphrases;
313  m_words.push_back (phrase);
314  }
315  } // Add ()
316 
355  {
356  // Expected number of collisions
357  //
358  // Number of buckets = k = 2^bits
359  long double k32 = 0xFFFFFFFF;
360  long double k64 = 0xFFFFFFFFFFFFFFFFULL;
361 
362  long double n = m_nphrases;
363  long double Ec32 = n * (n - 1) / ( 2 * k32) * (1 - (n - 2)/(3 * k32));
364  long double Ec64 = n * (n - 1) / ( 2 * k64) * (1 - (n - 2)/(3 * k64));
365 
366  // Output collisions
367  std::cout << "" << std::endl;
368  std::cout << "Number of words or phrases: " << n << std::endl;
369  std::cout << "Expected number of collisions: (32-bit table) " << Ec32
370  << std::endl;
371  std::cout << "Expected number of collisions: (64-bit table) " << Ec64
372  << std::endl;
373  } // ReportExpectedCollisions
374 
375 
377  void Report () const
378  {
380 
381  for (std::vector <Collider>::const_iterator it = m_hashes.begin ();
382  it != m_hashes.end ();
383  ++it)
384  {
385  it->Report ();
386  }
387  } // Report ()
388 
394  void TimeOne (const int hindex)
395  {
396  // Hashing speed
397  uint32_t reps = 100;
398  Hasher h = m_hashes[hindex].m_hash;
399  int start = clock ();
400  for (std::vector<std::string>::const_iterator w = m_words.begin ();
401  w != m_words.end();
402  ++w)
403  {
404  for (uint32_t i = 0; i < reps; ++i)
405  {
406  h.clear ().GetHash32 (*w);
407  }
408  }
409  int stop = clock ();
410  double delta = stop - start;
411  double per = 1e9 * delta / (m_nphrases * reps * CLOCKS_PER_SEC);
412 
413  std::cout << std::left
414  << std::setw (32) << m_hashes[hindex].GetName ()
415  << std::right
416  << std::setw (10) << m_nphrases
417  << std::setw (10) << reps
418  << std::setw (10) << stop - start
419  << std::setw (12) << per
420  << std::endl;
421 
422  } // TimeOne ()
423 
425  void Time ()
426  {
427  std::cout << "" << std::endl;
428  std::cout << std::left
429  << std::setw (32) << "Hash timing"
430  << std::right
431  << std::setw (10) << "Phrases"
432  << std::setw (10) << "Reps"
433  << std::setw (10) << "Ticks"
434  << std::setw (12) << "ns/hash"
435  << std::endl;
436 
437  for (unsigned int i = 0; i < m_hashes.size (); ++i)
438  {
439  TimeOne (i);
440  }
441  } // Time ()
442 
443 private:
444  unsigned long m_nphrases;
445  std::vector <Collider> m_hashes;
446  std::vector <std::string> m_words;
448 }; // class Dictionary
449 
450 
455 {
456 
457 public:
458 
465  bool Add (const std::string file)
466  {
467  if (std::find (m_files.begin (), m_files.end (), file) == m_files.end ())
468  {
469  m_files.push_back (file);
470  }
471 
472  return true;
473  }
474 
480  void ReadInto (Dictionary & dict)
481  {
482  if (m_files.size () == 0)
483  {
484  Add ("/usr/share/dict/web2");
485  }
486 
487  std::cout << "Hashing the dictionar"
488  << (m_files.size () == 1 ? "y" : "ies")
489  << std::endl;
490 
491  for (std::vector <std::string>::const_iterator it = m_files.begin ();
492  it != m_files.end ();
493  ++it)
494  {
495  std::string dictFile = *it;
496  std::cout << "Dictionary file: " << dictFile << std::endl;
497 
498  // Find collisions
499 
500  // Open the file
501  std::ifstream dictStream;
502  dictStream.open (dictFile.c_str () );
503  if (! dictStream.is_open () )
504  {
505  std::cerr << "Failed to open dictionary file."
506  << "'" << dictFile << "'"
507  << std::endl;
508  continue;
509  }
510 
511  while (dictStream.good () )
512  {
513  std::string phrase;
514  getline (dictStream, phrase);
515  dict.Add (phrase);
516  } // while
517 
518  dictStream.close ();
519 
520  } // for m_files
521 
522  } // ReadInto
523 
524 private:
525  std::vector <std::string> m_files;
527 }; // class DictFiles
528 
529 } // namespace Example
530 
531 } // namespace Hash
532 
533 } // namespace ns3
534 
535 
536 using namespace ns3;
537 using namespace ns3::Hash::Example;
538 
539 int
540 main (int argc, char *argv[])
541 {
542  std::cout << std::endl;
543  std::cout << "Hasher" << std::endl;
544 
545  bool timing = false;
546  DictFiles files;
547 
549  cmd.Usage ("Find hash collisions in the dictionary.");
550  cmd.AddValue ("dict", "Dictionary file to hash",
552  &files));
553  cmd.AddValue ("time", "Run timing test", timing);
554  cmd.Parse (argc, argv);
555 
556  Dictionary dict;
557  dict.Add ( Collider ("FNV1a",
558  Hasher ( Create<Hash::Function::Fnv1a> () ),
560  dict.Add ( Collider ("FNV1a",
561  Hasher ( Create<Hash::Function::Fnv1a> () ),
563 
564  dict.Add ( Collider ("Murmur3",
565  Hasher ( Create<Hash::Function::Murmur3> () ),
567  dict.Add ( Collider ("Murmur3",
568  Hasher ( Create<Hash::Function::Murmur3> () ),
570 
571  files.ReadInto (dict);
572 
573  dict.Report ();
574 
575  if (timing)
576  {
577  dict.Time ();
578  } // if (timing)
579 
580 
581 } // main
Use 32-bit hash function.
std::vector< std::pair< uint64_t, std::string > > collision_t
Collision map of subsequent instances.
void Add(const std::string phrase)
Add a string to the dictionary.
def start()
Definition: core.py:1482
void TimeOne(const int hindex)
Time and report the execution of one hash across the entire Dictionary.
std::string GetName() const
#define NS_LOG_COMPONENT_DEFINE(name)
Define a Log component with a specific name.
Definition: log.h:201
void Report() const
Print the collisions found.
void Time()
Report the execution time of each hash across the entire Dictionary.
tuple cmd
Definition: second.py:35
void ReportExpectedCollisions() const
Report the expected number of collisions.
bool Add(const std::string file)
CommandLine callback function to add a file argument to the list.
bool Add(const std::string phrase)
Add a string to the Collider.
unsigned long m_nphrases
Number of strings hashed.
void Usage(const std::string usage)
Supply the program usage and documentation.
Definition: command-line.cc:96
Word list and hashers to test.
Collider(const std::string name, Hasher hash, const enum Bits bits)
Constructor.
uint32_t GetHash32(const char *buffer, const size_t size)
Compute 32-bit hash of a byte buffer.
Definition: hash.h:239
Callback< R > MakeCallback(R(T::*memPtr)(void), OBJ objPtr)
Definition: callback.h:1489
std::vector< std::string > m_words
List of unique words.
Use 64-bit hash function.
Parse command-line arguments.
Definition: command-line.h:205
void ReadInto(Dictionary &dict)
Add phrases from the files into the dict.
Keep track of collisions.
std::string m_name
Name of this hash.
Bits
The size of hash function being tested.
Hasher & clear(void)
Restore initial state.
Definition: hash.cc:48
Every class exported by the ns3 library is enclosed in the ns3 namespace.
Source word list files.
std::vector< std::string > m_files
List of word files to use.
void Report() const
Print the collisions for each Collider.
enum Bits m_bits
Hash function.
std::vector< Collider > m_hashes
List of hash Colliders.
Namespace for hasher-example.
void Add(Collider c)
Add a Collider containing a hash function.
void AddValue(const std::string &name, const std::string &help, T &value)
Add a program argument, assigning to POD.
Definition: command-line.h:495
collision_t m_coll
The list of collisions.
void Parse(int argc, char *argv[])
Parse the program arguments.
hashdict_t m_dict
The dictionary map, indexed by hash.
uint64_t GetHash64(const char *buffer, const size_t size)
Compute 64-bit hash of a byte buffer.
Definition: hash.h:247
std::map< uint64_t, std::string > hashdict_t
Hashed dictionary of first instance of each hash.
Generic Hash function interface.
Definition: hash.h:87
uint64_t GetHash(const std::string phrase)
Get the appropriate hash value.