A Discrete-Event Network Simulator
API
hash-example.cc
Go to the documentation of this file.
1 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
2 /*
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License version 2 as
5  * published by the Free Software Foundation;
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software
14  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15  */
16 
17 #include <algorithm> // find
18 #include <climits> // CHAR_BIT
19 #include <fstream>
20 #include <iostream>
21 #include <iomanip>
22 #include <map>
23 #include <vector>
24 
25 #include "ns3/core-module.h"
26 #include "ns3/hash.h"
27 
103 namespace ns3
104 {
105 
106 NS_LOG_COMPONENT_DEFINE ("Hasher");
107 
108 namespace Hash
109 {
110 
115 namespace Example
116 {
117 
121 class Collider {
122 
123 public:
124  std::string m_name;
128  enum Bits {
131  };
132 
140  Collider (const std::string name, Hasher hash, const enum Bits bits)
141  : m_name (name),
142  m_hash (hash),
143  m_bits (bits)
144  { }
145 
152  bool Add (const std::string phrase)
153  {
154  uint64_t h = GetHash (phrase);
155 
156  // Check for collisions
157  if (m_dict.count (h) > 0)
158  {
159  // we've seen this hash before, why?
160  if (phrase == m_dict[h])
161  {
162  // duplicate phrase
163  return false;
164  }
165 
166  // new phrase generating a real collision
167  // alphabetize
168  if ( m_dict[h] < phrase)
169  {
170  m_coll.push_back (std::make_pair (h, phrase));
171  }
172  else
173  {
174  m_coll.push_back (std::make_pair (h, m_dict[h]));
175  m_dict[h] = phrase;
176  }
177  }
178  else
179  {
180  // Insert new hash
181  m_dict.insert (std::make_pair (h, phrase));
182  }
183  return true;
184  } // Add ()
185 
189  std::string GetName () const
190  {
191  std::string name = m_name;
192 
193  switch (m_bits)
194  {
195  case Bits32: name += " (32-bit version)"; break;
196  case Bits64: name += " (64-bit version)"; break;
197  default: name += " (unknown!?!)";
198  }
199  return name;
200  }
201 
203  void Report () const
204  {
205  std::cout << std::endl;
206 
207  std::cout << GetName () << ": " << m_coll.size () << " collisions:"
208  << std::endl;
209  for (collision_t::const_iterator it = m_coll.begin ();
210  it != m_coll.end ();
211  ++it)
212  {
213  uint64_t h = it->first;
214 
215  std::cout << std::setfill ('0') << std::hex << std::setw(8) << h
216  << std::dec << std::setfill(' ') << " "
217  << std::setw(20) << std::left
218  << m_dict.find (h)->second
219  << it->second
220  << std::right
221  << std::endl;
222  }
223  } // Report ()
224 
225 
226 private:
227 
234  uint64_t GetHash (const std::string phrase)
235  {
236  m_hash.clear ();
237  uint64_t h = 0;
238 
239  if (m_bits == Bits32)
240  {
241  h = m_hash.GetHash32 (phrase);
242  }
243  else
244  {
245  h = m_hash.GetHash64 (phrase);
246  }
247  return h;
248  }
249 
251  enum Bits m_bits;
252 
254  typedef std::map <uint64_t, std::string> hashdict_t;
255 
257  hashdict_t m_dict;
258 
260  typedef std::vector < std::pair<uint64_t, std::string> > collision_t;
261 
263  collision_t m_coll;
264 
265 }; // class Collider
266 
267 
271 class Dictionary {
272 public:
275  : m_nphrases (0)
276  {
277  m_words.reserve (320000);
278  }
279 
285  void Add (Collider c)
286  {
287  m_hashes.push_back (c);
288  }
289 
295  void Add (const std::string phrase)
296  {
297  if (phrase.size () == 0)
298  {
299  return ;
300  }
301 
302  int newPhrases = 0;
303  for (std::vector <Collider>::iterator it = m_hashes.begin ();
304  it != m_hashes.end ();
305  ++it)
306  {
307  newPhrases += it->Add (phrase);
308  }
309 
310  if (newPhrases)
311  {
312  ++m_nphrases;
313  m_words.push_back (phrase);
314  }
315  } // Add ()
316 
355  {
356  // Expected number of collisions
357  //
358  // Number of buckets = k = 2^bits
359  long double k32 = 0xFFFFFFFF;
360  long double k64 = 0xFFFFFFFFFFFFFFFFULL;
361 
362  long double n = m_nphrases;
363  long double Ec32 = n * (n - 1) / ( 2 * k32) * (1 - (n - 2)/(3 * k32));
364  long double Ec64 = n * (n - 1) / ( 2 * k64) * (1 - (n - 2)/(3 * k64));
365 
366  // Output collisions
367  std::cout << "" << std::endl;
368  std::cout << "Number of words or phrases: " << n << std::endl;
369  std::cout << "Expected number of collisions: (32-bit table) " << Ec32
370  << std::endl;
371  std::cout << "Expected number of collisions: (64-bit table) " << Ec64
372  << std::endl;
373  } // ReportExpectedCollisions
374 
375 
377  void Report () const
378  {
380 
381  for (std::vector <Collider>::const_iterator it = m_hashes.begin ();
382  it != m_hashes.end ();
383  ++it)
384  {
385  it->Report ();
386  }
387  } // Report ()
388 
394  void TimeOne (const int hindex)
395  {
396  // Hashing speed
397  uint32_t reps = 100;
398  Hasher h = m_hashes[hindex].m_hash;
399  int start = clock ();
400  for (std::vector<std::string>::const_iterator w = m_words.begin ();
401  w != m_words.end();
402  ++w)
403  {
404  for (uint32_t i = 0; i < reps; ++i)
405  {
406  h.clear ().GetHash32 (*w);
407  }
408  }
409  int stop = clock ();
410  double delta = stop - start;
411  double per = 1e9 * delta / (m_nphrases * reps * CLOCKS_PER_SEC);
412 
413  std::cout << std::left
414  << std::setw (32) << m_hashes[hindex].GetName ()
415  << std::right
416  << std::setw (10) << m_nphrases
417  << std::setw (10) << reps
418  << std::setw (10) << stop - start
419  << std::setw (12) << per
420  << std::endl;
421 
422  } // TimeOne ()
423 
425  void Time ()
426  {
427  std::cout << "" << std::endl;
428  std::cout << std::left
429  << std::setw (32) << "Hash timing"
430  << std::right
431  << std::setw (10) << "Phrases"
432  << std::setw (10) << "Reps"
433  << std::setw (10) << "Ticks"
434  << std::setw (12) << "ns/hash"
435  << std::endl;
436 
437  for (unsigned int i = 0; i < m_hashes.size (); ++i)
438  {
439  TimeOne (i);
440  }
441  } // Time ()
442 
443 private:
444  unsigned long m_nphrases;
445  std::vector <Collider> m_hashes;
446  std::vector <std::string> m_words;
448 }; // class Dictionary
449 
450 
455 {
456 
457 public:
458 
465  bool Add (const std::string file)
466  {
467  if (std::find (m_files.begin (), m_files.end (), file) == m_files.end ())
468  {
469  m_files.push_back (file);
470  }
471 
472  return true;
473  }
474 
480  void ReadInto (Dictionary & dict)
481  {
482  if (m_files.size () == 0)
483  {
484  Add ("/usr/share/dict/web2");
485  }
486 
487  std::cout << "Hashing the dictionar"
488  << (m_files.size () == 1 ? "y" : "ies")
489  << std::endl;
490 
491  for (std::vector <std::string>::const_iterator it = m_files.begin ();
492  it != m_files.end ();
493  ++it)
494  {
495  std::string dictFile = *it;
496  std::cout << "Dictionary file: " << dictFile << std::endl;
497 
498  // Find collisions
499 
500  // Open the file
501  std::ifstream dictStream;
502  dictStream.open (dictFile.c_str () );
503  if (! dictStream.is_open () )
504  {
505  std::cerr << "Failed to open dictionary file."
506  << "'" << dictFile << "'"
507  << std::endl;
508  continue;
509  }
510 
511  while (dictStream.good () )
512  {
513  std::string phrase;
514  getline (dictStream, phrase);
515  dict.Add (phrase);
516  } // while
517 
518  dictStream.close ();
519 
520  } // for m_files
521 
522  } // ReadInto
523 
524 private:
525  std::vector <std::string> m_files;
527 }; // class DictFiles
528 
529 } // namespace Example
530 
531 } // namespace Hash
532 
533 } // namespace ns3
534 
535 
536 using namespace ns3;
537 using namespace ns3::Hash::Example;
538 
539 int
540 main (int argc, char *argv[])
541 {
542  std::cout << std::endl;
543  std::cout << "Hasher" << std::endl;
544 
545  bool timing = false;
546  DictFiles files;
547 
548  CommandLine cmd;
549  cmd.Usage ("Find hash collisions in the dictionary.");
550  cmd.AddValue ("dict", "Dictionary file to hash",
552  &files));
553  cmd.AddValue ("time", "Run timing test", timing);
554  cmd.Parse (argc, argv);
555 
556  Dictionary dict;
557  dict.Add ( Collider ("FNV1a",
558  Hasher ( Create<Hash::Function::Fnv1a> () ),
560  dict.Add ( Collider ("FNV1a",
561  Hasher ( Create<Hash::Function::Fnv1a> () ),
563 
564  dict.Add ( Collider ("Murmur3",
565  Hasher ( Create<Hash::Function::Murmur3> () ),
567  dict.Add ( Collider ("Murmur3",
568  Hasher ( Create<Hash::Function::Murmur3> () ),
570 
571  files.ReadInto (dict);
572 
573  dict.Report ();
574 
575  if (timing)
576  {
577  dict.Time ();
578  } // if (timing)
579 
580 
581 } // main
Use 32-bit hash function.
std::vector< std::pair< uint64_t, std::string > > collision_t
Collision map of subsequent instances.
void Add(const std::string phrase)
Add a string to the dictionary.
def start()
Definition: core.py:1482
void TimeOne(const int hindex)
Time and report the execution of one hash across the entire Dictionary.
std::string GetName() const
#define NS_LOG_COMPONENT_DEFINE(name)
Define a Log component with a specific name.
Definition: log.h:201
void Report() const
Print the collisions found.
void Time()
Report the execution time of each hash across the entire Dictionary.
void ReportExpectedCollisions() const
Report the expected number of collisions.
bool Add(const std::string file)
CommandLine callback function to add a file argument to the list.
bool Add(const std::string phrase)
Add a string to the Collider.
unsigned long m_nphrases
Number of strings hashed.
void Usage(const std::string usage)
Supply the program usage and documentation.
Definition: command-line.cc:95
Word list and hashers to test.
Collider(const std::string name, Hasher hash, const enum Bits bits)
Constructor.
uint32_t GetHash32(const char *buffer, const size_t size)
Compute 32-bit hash of a byte buffer.
Definition: hash.h:239
Callback< R > MakeCallback(R(T::*memPtr)(void), OBJ objPtr)
Definition: callback.h:1296
std::vector< std::string > m_words
List of unique words.
Use 64-bit hash function.
Parse command-line arguments.
Definition: command-line.h:201
void ReadInto(Dictionary &dict)
Add phrases from the files into the dict.
Keep track of collisions.
std::string m_name
Name of this hash.
Bits
The size of hash function being tested.
Hasher & clear(void)
Restore initial state.
Definition: hash.cc:48
Every class exported by the ns3 library is enclosed in the ns3 namespace.
Source word list files.
std::vector< std::string > m_files
List of word files to use.
void Report() const
Print the collisions for each Collider.
enum Bits m_bits
Hash function.
std::vector< Collider > m_hashes
List of hash Colliders.
Namespace for hasher-example.
void Add(Collider c)
Add a Collider containing a hash function.
void AddValue(const std::string &name, const std::string &help, T &value)
Add a program argument, assigning to POD.
Definition: command-line.h:491
collision_t m_coll
The list of collisions.
void Parse(int argc, char *argv[])
Parse the program arguments.
hashdict_t m_dict
The dictionary map, indexed by hash.
uint64_t GetHash64(const char *buffer, const size_t size)
Compute 64-bit hash of a byte buffer.
Definition: hash.h:247
std::map< uint64_t, std::string > hashdict_t
Hashed dictionary of first instance of each hash.
Generic Hash function interface.
Definition: hash.h:87
uint64_t GetHash(const std::string phrase)
Get the appropriate hash value.