A Discrete-Event Network Simulator
API
hash-example.cc
Go to the documentation of this file.
1/* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
2/*
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License version 2 as
5 * published by the Free Software Foundation;
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * along with this program; if not, write to the Free Software
14 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15 */
16
17#include <algorithm> // find
18#include <climits> // CHAR_BIT
19#include <fstream>
20#include <iostream>
21#include <iomanip>
22#include <map>
23#include <vector>
24
25#include "ns3/core-module.h"
26#include "ns3/hash.h"
27
103namespace ns3 {
104
105NS_LOG_COMPONENT_DEFINE ("Hasher");
106
107namespace Hash {
108
113namespace Example {
114
119{
120
121public:
122 std::string m_name;
126 enum Bits
127 {
129 Bits64
130 };
131
139 Collider (const std::string name, Hasher hash, const enum Bits bits)
140 : m_name (name),
141 m_hash (hash),
142 m_bits (bits)
143 { }
144
151 bool Add (const std::string phrase)
152 {
153 uint64_t h = GetHash (phrase);
154
155 // Check for collisions
156 if (m_dict.count (h) > 0)
157 {
158 // we've seen this hash before, why?
159 if (phrase == m_dict[h])
160 {
161 // duplicate phrase
162 return false;
163 }
164
165 // new phrase generating a real collision
166 // alphabetize
167 if ( m_dict[h] < phrase)
168 {
169 m_coll.push_back (std::make_pair (h, phrase));
170 }
171 else
172 {
173 m_coll.push_back (std::make_pair (h, m_dict[h]));
174 m_dict[h] = phrase;
175 }
176 }
177 else
178 {
179 // Insert new hash
180 m_dict.insert (std::make_pair (h, phrase));
181 }
182 return true;
183 } // Add ()
184
188 std::string GetName () const
189 {
190 std::string name = m_name;
191
192 switch (m_bits)
193 {
194 /* *NS_CHECK_STYLE_OFF* */
195 case Bits32: name += " (32-bit version)"; break;
196 case Bits64: name += " (64-bit version)"; break;
197 default: name += " (unknown!?!)";
198 /* *NS_CHECK_STYLE_ON* */
199 }
200 return name;
201 }
202
204 void Report () const
205 {
206 std::cout << std::endl;
207
208 std::cout << GetName () << ": " << m_coll.size () << " collisions:"
209 << std::endl;
210 for (auto collision : m_coll)
211 {
212 uint64_t h = collision.first;
213
214 std::cout << std::setfill ('0') << std::hex << std::setw (8) << h
215 << std::dec << std::setfill (' ') << " "
216 << std::setw (20) << std::left
217 << m_dict.find (h)->second
218 << collision.second
219 << std::right
220 << std::endl;
221 }
222 } // Report ()
223
224private:
225
232 uint64_t GetHash (const std::string phrase)
233 {
234 m_hash.clear ();
235 uint64_t h = 0;
236
237 if (m_bits == Bits32)
238 {
239 h = m_hash.GetHash32 (phrase);
240 }
241 else
242 {
243 h = m_hash.GetHash64 (phrase);
244 }
245 return h;
246 }
247
250
252 typedef std::map <uint64_t, std::string> hashdict_t;
253
256
258 typedef std::vector < std::pair<uint64_t, std::string> > collision_t;
259
262
263}; // class Collider
264
265
270{
271public:
274 : m_nphrases (0)
275 {
276 m_words.reserve (320000);
277 }
278
284 void Add (Collider c)
285 {
286 m_hashes.push_back (c);
287 }
288
294 void Add (const std::string phrase)
295 {
296 if (phrase.size () == 0)
297 {
298 return;
299 }
300
301 int newPhrases = 0;
302 for (auto & collider : m_hashes)
303 {
304 newPhrases += collider.Add (phrase);
305 }
306
307 if (newPhrases)
308 {
309 ++m_nphrases;
310 m_words.push_back (phrase);
311 }
312 } // Add ()
313
352 {
353 // Expected number of collisions
354 //
355 // Number of buckets = k = 2^bits
356 long double k32 = 0xFFFFFFFF;
357 long double k64 = static_cast<long double> (0xFFFFFFFFFFFFFFFFULL);
358
359 long double n = m_nphrases;
360 long double Ec32 = n * (n - 1) / ( 2 * k32) * (1 - (n - 2) / (3 * k32));
361 long double Ec64 = n * (n - 1) / ( 2 * k64) * (1 - (n - 2) / (3 * k64));
362
363 // Output collisions
364 std::cout << "" << std::endl;
365 std::cout << "Number of words or phrases: " << n << std::endl;
366 std::cout << "Expected number of collisions: (32-bit table) " << Ec32
367 << std::endl;
368 std::cout << "Expected number of collisions: (64-bit table) " << Ec64
369 << std::endl;
370 } // ReportExpectedCollisions
371
372
374 void Report () const
375 {
377
378 for (auto collider : m_hashes)
379 {
380 collider.Report ();
381 }
382 } // Report ()
383
389 void TimeOne (const Collider & collider)
390 {
391 // Hashing speed
392 uint32_t reps = 100;
393 Hasher h = collider.m_hash;
394 int start = clock ();
395 for (auto const & word : m_words)
396 {
397 for (uint32_t i = 0; i < reps; ++i)
398 {
399 h.clear ().GetHash32 (word);
400 }
401 }
402 int stop = clock ();
403 double delta = stop - start;
404 double per = 1e9 * delta / (m_nphrases * reps * CLOCKS_PER_SEC);
405
406 std::cout << std::left
407 << std::setw (32) << collider.GetName ()
408 << std::right
409 << std::setw (10) << m_nphrases
410 << std::setw (10) << reps
411 << std::setw (10) << stop - start
412 << std::setw (12) << per
413 << std::endl;
414
415 } // TimeOne ()
416
418 void Time ()
419 {
420 std::cout << "" << std::endl;
421 std::cout << std::left
422 << std::setw (32) << "Hash timing"
423 << std::right
424 << std::setw (10) << "Phrases"
425 << std::setw (10) << "Reps"
426 << std::setw (10) << "Ticks"
427 << std::setw (12) << "ns/hash"
428 << std::endl;
429
430 for (auto const & collider : m_hashes)
431 {
432 TimeOne (collider);
433 }
434 } // Time ()
435
436private:
437 unsigned long m_nphrases;
438 std::vector <Collider> m_hashes;
439 std::vector <std::string> m_words;
441}; // class Dictionary
442
443
448{
449
450public:
451
458 bool Add (const std::string file)
459 {
460 if (std::find (m_files.begin (), m_files.end (), file) == m_files.end ())
461 {
462 m_files.push_back (file);
463 }
464
465 return true;
466 }
467
469 static std::string GetDefault (void)
470 {
471 return "/usr/share/dict/words";
472 }
473
479 void ReadInto (Dictionary & dict)
480 {
481 if (m_files.size () == 0)
482 {
483 Add (GetDefault ());
484 }
485
486 std::cout << "Hashing the dictionar"
487 << (m_files.size () == 1 ? "y" : "ies")
488 << std::endl;
489
490 for (auto dictFile : m_files)
491 {
492 std::cout << "Dictionary file: " << dictFile << std::endl;
493
494 // Find collisions
495
496 // Open the file
497 std::ifstream dictStream;
498 dictStream.open (dictFile.c_str () );
499 if (!dictStream.is_open () )
500 {
501 std::cerr << "Failed to open dictionary file."
502 << "'" << dictFile << "'"
503 << std::endl;
504 continue;
505 }
506
507 while (dictStream.good () )
508 {
509 std::string phrase;
510 getline (dictStream, phrase);
511 dict.Add (phrase);
512 } // while
513
514 dictStream.close ();
515
516 } // for m_files
517
518 } // ReadInto
519
520private:
521 std::vector <std::string> m_files;
523}; // class DictFiles
524
525} // namespace Example
526
527} // namespace Hash
528
529} // namespace ns3
530
531
532using namespace ns3;
533using namespace ns3::Hash::Example;
534
535int
536main (int argc, char *argv[])
537{
538 std::cout << std::endl;
539 std::cout << "Hasher" << std::endl;
540
541 bool timing = false;
542 DictFiles files;
543
544 CommandLine cmd (__FILE__);
545 cmd.Usage ("Find hash collisions in the dictionary.");
546 cmd.AddValue ("dict", "Dictionary file to hash",
547 MakeCallback (&DictFiles::Add,
548 &files),
550
551 cmd.AddValue ("time", "Run timing test", timing);
552 cmd.Parse (argc, argv);
553
554 Dictionary dict;
555 dict.Add ( Collider ("FNV1a",
556 Hasher ( Create<Hash::Function::Fnv1a> () ),
557 Collider::Bits32));
558 dict.Add ( Collider ("FNV1a",
559 Hasher ( Create<Hash::Function::Fnv1a> () ),
560 Collider::Bits64));
561
562 dict.Add ( Collider ("Murmur3",
563 Hasher ( Create<Hash::Function::Murmur3> () ),
564 Collider::Bits32));
565 dict.Add ( Collider ("Murmur3",
566 Hasher ( Create<Hash::Function::Murmur3> () ),
567 Collider::Bits64));
568
569 files.ReadInto (dict);
570
571 dict.Report ();
572
573 if (timing)
574 {
575 dict.Time ();
576 } // if (timing)
577
578
579} // main
Parse command-line arguments.
Definition: command-line.h:229
Keep track of collisions.
collision_t m_coll
The list of collisions.
std::map< uint64_t, std::string > hashdict_t
Hashed dictionary of first instance of each hash.
uint64_t GetHash(const std::string phrase)
Get the appropriate hash value.
std::string m_name
Name of this hash.
void Report() const
Print the collisions found.
Collider(const std::string name, Hasher hash, const enum Bits bits)
Constructor.
Bits
The size of hash function being tested.
@ Bits64
Use 64-bit hash function.
@ Bits32
Use 32-bit hash function.
std::vector< std::pair< uint64_t, std::string > > collision_t
Collision map of subsequent instances.
bool Add(const std::string phrase)
Add a string to the Collider.
enum Bits m_bits
Hash function.
std::string GetName() const
hashdict_t m_dict
The dictionary map, indexed by hash.
Source word list files.
std::vector< std::string > m_files
List of word files to use.
bool Add(const std::string file)
CommandLine callback function to add a file argument to the list.
void ReadInto(Dictionary &dict)
Add phrases from the files into the dict.
static std::string GetDefault(void)
Word list and hashers to test.
void ReportExpectedCollisions() const
Report the expected number of collisions.
void Add(Collider c)
Add a Collider containing a hash function.
std::vector< std::string > m_words
List of unique words.
std::vector< Collider > m_hashes
List of hash Colliders.
void Report() const
Print the collisions for each Collider.
void TimeOne(const Collider &collider)
Time and report the execution of one hash across the entire Dictionary.
unsigned long m_nphrases
Number of strings hashed.
void Add(const std::string phrase)
Add a string to the dictionary.
void Time()
Report the execution time of each hash across the entire Dictionary.
Generic Hash function interface.
Definition: hash.h:88
uint32_t GetHash32(const char *buffer, const std::size_t size)
Compute 32-bit hash of a byte buffer.
Definition: hash.h:239
uint64_t GetHash64(const char *buffer, const std::size_t size)
Compute 64-bit hash of a byte buffer.
Definition: hash.h:247
Hasher & clear(void)
Restore initial state.
Definition: hash.cc:55
std::string GetDefault(const T &val)
Helper to specialize CommandLine::UserItem::GetDefault() on types needing special handling.
Definition: command-line.h:721
#define NS_LOG_COMPONENT_DEFINE(name)
Define a Log component with a specific name.
Definition: log.h:205
Namespace for hasher-example.
Every class exported by the ns3 library is enclosed in the ns3 namespace.
Callback< R, Ts... > MakeCallback(R(T::*memPtr)(Ts...), OBJ objPtr)
Build Callbacks for class method members which take varying numbers of arguments and potentially retu...
Definition: callback.h:1648
cmd
Definition: second.py:35
def start()
Definition: core.py:1853