A Discrete-Event Network Simulator
API
Loading...
Searching...
No Matches
hash-example.cc
Go to the documentation of this file.
1/*
2 * This program is free software; you can redistribute it and/or modify
3 * it under the terms of the GNU General Public License version 2 as
4 * published by the Free Software Foundation;
5 *
6 * This program is distributed in the hope that it will be useful,
7 * but WITHOUT ANY WARRANTY; without even the implied warranty of
8 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9 * GNU General Public License for more details.
10 *
11 * You should have received a copy of the GNU General Public License
12 * along with this program; if not, write to the Free Software
13 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
14 */
15
16#include "ns3/core-module.h"
17#include "ns3/hash.h"
18
19#include <algorithm> // find
20#include <climits> // CHAR_BIT
21#include <fstream>
22#include <iomanip>
23#include <iostream>
24#include <map>
25#include <vector>
26
102namespace ns3
103{
104
106
107namespace Hash
108{
109
114namespace Example
115{
116
121{
122 public:
123 std::string m_name;
127 enum Bits
128 {
130 Bits64
131 };
132
140 Collider(const std::string name, Hasher hash, const Bits bits)
141 : m_name(name),
142 m_hash(hash),
143 m_bits(bits)
144 {
145 }
146
153 bool Add(const std::string phrase)
154 {
155 uint64_t h = GetHash(phrase);
156
157 // Check for collisions
158 if (m_dict.count(h) > 0)
159 {
160 // we've seen this hash before, why?
161 if (phrase == m_dict[h])
162 {
163 // duplicate phrase
164 return false;
165 }
166
167 // new phrase generating a real collision
168 // alphabetize
169 if (m_dict[h] < phrase)
170 {
171 m_coll.emplace_back(h, phrase);
172 }
173 else
174 {
175 m_coll.emplace_back(h, m_dict[h]);
176 m_dict[h] = phrase;
177 }
178 }
179 else
180 {
181 // Insert new hash
182 m_dict.insert(std::make_pair(h, phrase));
183 }
184 return true;
185 } // Add ()
186
190 std::string GetName() const
191 {
192 std::string name = m_name;
193
194 switch (m_bits)
195 {
196 case Bits32:
197 name += " (32-bit version)";
198 break;
199 case Bits64:
200 name += " (64-bit version)";
201 break;
202 default:
203 name += " (unknown!?!)";
204 }
205 return name;
206 }
207
209 void Report() const
210 {
211 std::cout << std::endl;
212
213 std::cout << GetName() << ": " << m_coll.size() << " collisions:" << std::endl;
214 for (const auto& collision : m_coll)
215 {
216 uint64_t h = collision.first;
217
218 std::cout << std::setfill('0') << std::hex << std::setw(8) << h << std::dec
219 << std::setfill(' ') << " " << std::setw(20) << std::left
220 << m_dict.find(h)->second << collision.second << std::right << std::endl;
221 }
222 } // Report ()
223
224 private:
231 uint64_t GetHash(const std::string phrase)
232 {
233 m_hash.clear();
234 uint64_t h = 0;
235
236 if (m_bits == Bits32)
237 {
238 h = m_hash.GetHash32(phrase);
239 }
240 else
241 {
242 h = m_hash.GetHash64(phrase);
243 }
244 return h;
245 }
246
249
251 typedef std::map<uint64_t, std::string> hashdict_t;
252
255
257 typedef std::vector<std::pair<uint64_t, std::string>> collision_t;
258
261
262}; // class Collider
263
268{
269 public:
272 : m_nphrases(0)
273 {
274 m_words.reserve(320000);
275 }
276
282 void Add(Collider c)
283 {
284 m_hashes.push_back(c);
285 }
286
292 void Add(const std::string phrase)
293 {
294 if (phrase.empty())
295 {
296 return;
297 }
298
299 int newPhrases = 0;
300 for (auto& collider : m_hashes)
301 {
302 newPhrases += collider.Add(phrase);
303 }
304
305 if (newPhrases)
306 {
307 ++m_nphrases;
308 m_words.push_back(phrase);
309 }
310 } // Add ()
311
350 {
351 // Expected number of collisions
352 //
353 // Number of buckets = k = 2^bits
354 long double k32 = 0xFFFFFFFF;
355 long double k64 = static_cast<long double>(0xFFFFFFFFFFFFFFFFULL);
356
357 long double n = m_nphrases;
358 long double Ec32 = n * (n - 1) / (2 * k32) * (1 - (n - 2) / (3 * k32));
359 long double Ec64 = n * (n - 1) / (2 * k64) * (1 - (n - 2) / (3 * k64));
360
361 // Output collisions
362 std::cout << "" << std::endl;
363 std::cout << "Number of words or phrases: " << n << std::endl;
364 std::cout << "Expected number of collisions: (32-bit table) " << Ec32 << std::endl;
365 std::cout << "Expected number of collisions: (64-bit table) " << Ec64 << std::endl;
366 } // ReportExpectedCollisions
367
369 void Report() const
370 {
372
373 for (const auto& collider : m_hashes)
374 {
375 collider.Report();
376 }
377 } // Report ()
378
384 void TimeOne(const Collider& collider)
385 {
386 // Hashing speed
387 uint32_t reps = 100;
388 Hasher h = collider.m_hash;
389 int start = clock();
390 for (const auto& word : m_words)
391 {
392 for (uint32_t i = 0; i < reps; ++i)
393 {
394 h.clear().GetHash32(word);
395 }
396 }
397 int stop = clock();
398 double delta = stop - start;
399 double per = 1e9 * delta / (m_nphrases * reps * CLOCKS_PER_SEC);
400
401 std::cout << std::left << std::setw(32) << collider.GetName() << std::right << std::setw(10)
402 << m_nphrases << std::setw(10) << reps << std::setw(10) << stop - start
403 << std::setw(12) << per << std::endl;
404
405 } // TimeOne ()
406
408 void Time()
409 {
410 std::cout << "" << std::endl;
411 std::cout << std::left << std::setw(32) << "Hash timing" << std::right << std::setw(10)
412 << "Phrases" << std::setw(10) << "Reps" << std::setw(10) << "Ticks"
413 << std::setw(12) << "ns/hash" << std::endl;
414
415 for (const auto& collider : m_hashes)
416 {
417 TimeOne(collider);
418 }
419 } // Time ()
420
421 private:
422 unsigned long m_nphrases;
423 std::vector<Collider> m_hashes;
424 std::vector<std::string> m_words;
426}; // class Dictionary
427
432{
433 public:
440 bool Add(const std::string& file)
441 {
442 if (std::find(m_files.begin(), m_files.end(), file) == m_files.end())
443 {
444 m_files.push_back(file);
445 }
446
447 return true;
448 }
449
451 static std::string GetDefault()
452 {
453 return "/usr/share/dict/words";
454 }
455
462 {
463 if (m_files.empty())
464 {
465 Add(GetDefault());
466 }
467
468 std::cout << "Hashing the dictionar" << (m_files.size() == 1 ? "y" : "ies") << std::endl;
469
470 for (const auto& dictFile : m_files)
471 {
472 std::cout << "Dictionary file: " << dictFile << std::endl;
473
474 // Find collisions
475
476 // Open the file
477 std::ifstream dictStream;
478 dictStream.open(dictFile);
479 if (!dictStream.is_open())
480 {
481 std::cerr << "Failed to open dictionary file."
482 << "'" << dictFile << "'" << std::endl;
483 continue;
484 }
485
486 while (dictStream.good())
487 {
488 std::string phrase;
489 getline(dictStream, phrase);
490 dict.Add(phrase);
491 } // while
492
493 dictStream.close();
494
495 } // for m_files
496
497 } // ReadInto
498
499 private:
500 std::vector<std::string> m_files;
502}; // class DictFiles
503
504} // namespace Example
505
506} // namespace Hash
507
508} // namespace ns3
509
510using namespace ns3;
511using namespace ns3::Hash::Example;
512
513int
514main(int argc, char* argv[])
515{
516 std::cout << std::endl;
517 std::cout << "Hasher" << std::endl;
518
519 bool timing = false;
520 DictFiles files;
521
522 CommandLine cmd(__FILE__);
523 cmd.Usage("Find hash collisions in the dictionary.");
524 cmd.AddValue("dict",
525 "Dictionary file to hash",
528
529 cmd.AddValue("time", "Run timing test", timing);
530 cmd.Parse(argc, argv);
531
532 Dictionary dict;
533 dict.Add(Collider("FNV1a", Hasher(Create<Hash::Function::Fnv1a>()), Collider::Bits32));
534 dict.Add(Collider("FNV1a", Hasher(Create<Hash::Function::Fnv1a>()), Collider::Bits64));
535
536 dict.Add(Collider("Murmur3", Hasher(Create<Hash::Function::Murmur3>()), Collider::Bits32));
537 dict.Add(Collider("Murmur3", Hasher(Create<Hash::Function::Murmur3>()), Collider::Bits64));
538
539 files.ReadInto(dict);
540
541 dict.Report();
542
543 if (timing)
544 {
545 dict.Time();
546 } // if (timing)
547
548 return 0;
549} // main
Parse command-line arguments.
Definition: command-line.h:232
Keep track of collisions.
Collider(const std::string name, Hasher hash, const Bits bits)
Constructor.
collision_t m_coll
The list of collisions.
std::map< uint64_t, std::string > hashdict_t
Hashed dictionary of first instance of each hash.
uint64_t GetHash(const std::string phrase)
Get the appropriate hash value.
std::string m_name
Name of this hash.
std::vector< std::pair< uint64_t, std::string > > collision_t
Collision map of subsequent instances.
void Report() const
Print the collisions found.
Bits
The size of hash function being tested.
@ Bits64
Use 64-bit hash function.
@ Bits32
Use 32-bit hash function.
bool Add(const std::string phrase)
Add a string to the Collider.
std::string GetName() const
hashdict_t m_dict
The dictionary map, indexed by hash.
Bits m_bits
Hash function.
Source word list files.
std::vector< std::string > m_files
List of word files to use.
bool Add(const std::string &file)
CommandLine callback function to add a file argument to the list.
static std::string GetDefault()
void ReadInto(Dictionary &dict)
Add phrases from the files into the dict.
Word list and hashers to test.
void ReportExpectedCollisions() const
Report the expected number of collisions.
void Add(Collider c)
Add a Collider containing a hash function.
std::vector< std::string > m_words
List of unique words.
std::vector< Collider > m_hashes
List of hash Colliders.
void Report() const
Print the collisions for each Collider.
void TimeOne(const Collider &collider)
Time and report the execution of one hash across the entire Dictionary.
unsigned long m_nphrases
Number of strings hashed.
void Add(const std::string phrase)
Add a string to the dictionary.
void Time()
Report the execution time of each hash across the entire Dictionary.
Generic Hash function interface.
Definition: hash.h:87
uint32_t GetHash32(const char *buffer, const std::size_t size)
Compute 32-bit hash of a byte buffer.
Definition: hash.h:236
uint64_t GetHash64(const char *buffer, const std::size_t size)
Compute 64-bit hash of a byte buffer.
Definition: hash.h:243
Hasher & clear()
Restore initial state.
Definition: hash.cc:56
#define NS_LOG_COMPONENT_DEFINE(name)
Define a Log component with a specific name.
Definition: log.h:202
Namespace for hasher-example.
Every class exported by the ns3 library is enclosed in the ns3 namespace.
Callback< R, Args... > MakeCallback(R(T::*memPtr)(Args...), OBJ objPtr)
Build Callbacks for class method members which take varying numbers of arguments and potentially retu...
Definition: callback.h:702