A Discrete-Event Network Simulator
API
hash-example.cc
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License version 2 as
4  * published by the Free Software Foundation;
5  *
6  * This program is distributed in the hope that it will be useful,
7  * but WITHOUT ANY WARRANTY; without even the implied warranty of
8  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
9  * GNU General Public License for more details.
10  *
11  * You should have received a copy of the GNU General Public License
12  * along with this program; if not, write to the Free Software
13  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
14  */
15 
16 #include "ns3/core-module.h"
17 #include "ns3/hash.h"
18 
19 #include <algorithm> // find
20 #include <climits> // CHAR_BIT
21 #include <fstream>
22 #include <iomanip>
23 #include <iostream>
24 #include <map>
25 #include <vector>
26 
102 namespace ns3
103 {
104 
105 NS_LOG_COMPONENT_DEFINE("Hasher");
106 
107 namespace Hash
108 {
109 
114 namespace Example
115 {
116 
120 class Collider
121 {
122  public:
123  std::string m_name;
127  enum Bits
128  {
130  Bits64
131  };
132 
140  Collider(const std::string name, Hasher hash, const Bits bits)
141  : m_name(name),
142  m_hash(hash),
143  m_bits(bits)
144  {
145  }
146 
153  bool Add(const std::string phrase)
154  {
155  uint64_t h = GetHash(phrase);
156 
157  // Check for collisions
158  if (m_dict.count(h) > 0)
159  {
160  // we've seen this hash before, why?
161  if (phrase == m_dict[h])
162  {
163  // duplicate phrase
164  return false;
165  }
166 
167  // new phrase generating a real collision
168  // alphabetize
169  if (m_dict[h] < phrase)
170  {
171  m_coll.emplace_back(h, phrase);
172  }
173  else
174  {
175  m_coll.emplace_back(h, m_dict[h]);
176  m_dict[h] = phrase;
177  }
178  }
179  else
180  {
181  // Insert new hash
182  m_dict.insert(std::make_pair(h, phrase));
183  }
184  return true;
185  } // Add ()
186 
190  std::string GetName() const
191  {
192  std::string name = m_name;
193 
194  switch (m_bits)
195  {
196  case Bits32:
197  name += " (32-bit version)";
198  break;
199  case Bits64:
200  name += " (64-bit version)";
201  break;
202  default:
203  name += " (unknown!?!)";
204  }
205  return name;
206  }
207 
209  void Report() const
210  {
211  std::cout << std::endl;
212 
213  std::cout << GetName() << ": " << m_coll.size() << " collisions:" << std::endl;
214  for (const auto& collision : m_coll)
215  {
216  uint64_t h = collision.first;
217 
218  std::cout << std::setfill('0') << std::hex << std::setw(8) << h << std::dec
219  << std::setfill(' ') << " " << std::setw(20) << std::left
220  << m_dict.find(h)->second << collision.second << std::right << std::endl;
221  }
222  } // Report ()
223 
224  private:
231  uint64_t GetHash(const std::string phrase)
232  {
233  m_hash.clear();
234  uint64_t h = 0;
235 
236  if (m_bits == Bits32)
237  {
238  h = m_hash.GetHash32(phrase);
239  }
240  else
241  {
242  h = m_hash.GetHash64(phrase);
243  }
244  return h;
245  }
246 
249 
251  typedef std::map<uint64_t, std::string> hashdict_t;
252 
255 
257  typedef std::vector<std::pair<uint64_t, std::string>> collision_t;
258 
261 
262 }; // class Collider
263 
268 {
269  public:
272  : m_nphrases(0)
273  {
274  m_words.reserve(320000);
275  }
276 
282  void Add(Collider c)
283  {
284  m_hashes.push_back(c);
285  }
286 
292  void Add(const std::string phrase)
293  {
294  if (phrase.empty())
295  {
296  return;
297  }
298 
299  bool newPhrases = false;
300  for (auto& collider : m_hashes)
301  {
302  newPhrases |= collider.Add(phrase);
303  }
304 
305  if (newPhrases)
306  {
307  ++m_nphrases;
308  m_words.push_back(phrase);
309  }
310  } // Add ()
311 
350  {
351  // Expected number of collisions
352  //
353  // Number of buckets = k = 2^bits
354  long double k32 = 0xFFFFFFFF;
355  auto k64 = static_cast<long double>(0xFFFFFFFFFFFFFFFFULL);
356 
357  long double n = m_nphrases;
358  long double Ec32 = n * (n - 1) / (2 * k32) * (1 - (n - 2) / (3 * k32));
359  long double Ec64 = n * (n - 1) / (2 * k64) * (1 - (n - 2) / (3 * k64));
360 
361  // Output collisions
362  std::cout << "" << std::endl;
363  std::cout << "Number of words or phrases: " << n << std::endl;
364  std::cout << "Expected number of collisions: (32-bit table) " << Ec32 << std::endl;
365  std::cout << "Expected number of collisions: (64-bit table) " << Ec64 << std::endl;
366  } // ReportExpectedCollisions
367 
369  void Report() const
370  {
372 
373  for (const auto& collider : m_hashes)
374  {
375  collider.Report();
376  }
377  } // Report ()
378 
384  void TimeOne(const Collider& collider)
385  {
386  // Hashing speed
387  uint32_t reps = 100;
388  Hasher h = collider.m_hash;
389  int start = clock();
390  for (const auto& word : m_words)
391  {
392  for (uint32_t i = 0; i < reps; ++i)
393  {
394  h.clear().GetHash32(word);
395  }
396  }
397  int stop = clock();
398  double delta = stop - start;
399  double per = 1e9 * delta / (m_nphrases * reps * CLOCKS_PER_SEC);
400 
401  std::cout << std::left << std::setw(32) << collider.GetName() << std::right << std::setw(10)
402  << m_nphrases << std::setw(10) << reps << std::setw(10) << stop - start
403  << std::setw(12) << per << std::endl;
404 
405  } // TimeOne ()
406 
408  void Time()
409  {
410  std::cout << "" << std::endl;
411  std::cout << std::left << std::setw(32) << "Hash timing" << std::right << std::setw(10)
412  << "Phrases" << std::setw(10) << "Reps" << std::setw(10) << "Ticks"
413  << std::setw(12) << "ns/hash" << std::endl;
414 
415  for (const auto& collider : m_hashes)
416  {
417  TimeOne(collider);
418  }
419  } // Time ()
420 
421  private:
422  unsigned long m_nphrases;
423  std::vector<Collider> m_hashes;
424  std::vector<std::string> m_words;
426 }; // class Dictionary
427 
432 {
433  public:
440  bool Add(const std::string& file)
441  {
442  if (std::find(m_files.begin(), m_files.end(), file) == m_files.end())
443  {
444  m_files.push_back(file);
445  }
446 
447  return true;
448  }
449 
451  static std::string GetDefault()
452  {
453  return "/usr/share/dict/words";
454  }
455 
461  void ReadInto(Dictionary& dict)
462  {
463  if (m_files.empty())
464  {
465  Add(GetDefault());
466  }
467 
468  std::cout << "Hashing the dictionar" << (m_files.size() == 1 ? "y" : "ies") << std::endl;
469 
470  for (const auto& dictFile : m_files)
471  {
472  std::cout << "Dictionary file: " << dictFile << std::endl;
473 
474  // Find collisions
475 
476  // Open the file
477  std::ifstream dictStream;
478  dictStream.open(dictFile);
479  if (!dictStream.is_open())
480  {
481  std::cerr << "Failed to open dictionary file."
482  << "'" << dictFile << "'" << std::endl;
483  continue;
484  }
485 
486  while (dictStream.good())
487  {
488  std::string phrase;
489  getline(dictStream, phrase);
490  dict.Add(phrase);
491  } // while
492 
493  dictStream.close();
494 
495  } // for m_files
496 
497  } // ReadInto
498 
499  private:
500  std::vector<std::string> m_files;
502 }; // class DictFiles
503 
504 } // namespace Example
505 
506 } // namespace Hash
507 
508 } // namespace ns3
509 
510 using namespace ns3;
511 using namespace ns3::Hash::Example;
512 
513 int
514 main(int argc, char* argv[])
515 {
516  std::cout << std::endl;
517  std::cout << "Hasher" << std::endl;
518 
519  bool timing = false;
520  DictFiles files;
521 
522  CommandLine cmd(__FILE__);
523  cmd.Usage("Find hash collisions in the dictionary.");
524  cmd.AddValue("dict",
525  "Dictionary file to hash",
526  MakeCallback(&DictFiles::Add, &files),
528 
529  cmd.AddValue("time", "Run timing test", timing);
530  cmd.Parse(argc, argv);
531 
532  Dictionary dict;
533  dict.Add(Collider("FNV1a", Hasher(Create<Hash::Function::Fnv1a>()), Collider::Bits32));
534  dict.Add(Collider("FNV1a", Hasher(Create<Hash::Function::Fnv1a>()), Collider::Bits64));
535 
536  dict.Add(Collider("Murmur3", Hasher(Create<Hash::Function::Murmur3>()), Collider::Bits32));
537  dict.Add(Collider("Murmur3", Hasher(Create<Hash::Function::Murmur3>()), Collider::Bits64));
538 
539  files.ReadInto(dict);
540 
541  dict.Report();
542 
543  if (timing)
544  {
545  dict.Time();
546  } // if (timing)
547 
548  return 0;
549 } // main
Parse command-line arguments.
Definition: command-line.h:232
Keep track of collisions.
Collider(const std::string name, Hasher hash, const Bits bits)
Constructor.
collision_t m_coll
The list of collisions.
std::map< uint64_t, std::string > hashdict_t
Hashed dictionary of first instance of each hash.
uint64_t GetHash(const std::string phrase)
Get the appropriate hash value.
std::string m_name
Name of this hash.
std::vector< std::pair< uint64_t, std::string > > collision_t
Collision map of subsequent instances.
void Report() const
Print the collisions found.
Bits
The size of hash function being tested.
@ Bits64
Use 64-bit hash function.
@ Bits32
Use 32-bit hash function.
bool Add(const std::string phrase)
Add a string to the Collider.
std::string GetName() const
hashdict_t m_dict
The dictionary map, indexed by hash.
Bits m_bits
Hash function.
Source word list files.
std::vector< std::string > m_files
List of word files to use.
bool Add(const std::string &file)
CommandLine callback function to add a file argument to the list.
static std::string GetDefault()
void ReadInto(Dictionary &dict)
Add phrases from the files into the dict.
Word list and hashers to test.
void ReportExpectedCollisions() const
Report the expected number of collisions.
void Add(Collider c)
Add a Collider containing a hash function.
std::vector< std::string > m_words
List of unique words.
std::vector< Collider > m_hashes
List of hash Colliders.
void Report() const
Print the collisions for each Collider.
void TimeOne(const Collider &collider)
Time and report the execution of one hash across the entire Dictionary.
unsigned long m_nphrases
Number of strings hashed.
void Add(const std::string phrase)
Add a string to the dictionary.
void Time()
Report the execution time of each hash across the entire Dictionary.
Generic Hash function interface.
Definition: hash.h:87
uint32_t GetHash32(const char *buffer, const std::size_t size)
Compute 32-bit hash of a byte buffer.
Definition: hash.h:236
uint64_t GetHash64(const char *buffer, const std::size_t size)
Compute 64-bit hash of a byte buffer.
Definition: hash.h:243
Hasher & clear()
Restore initial state.
Definition: hash.cc:56
#define NS_LOG_COMPONENT_DEFINE(name)
Define a Log component with a specific name.
Definition: log.h:202
std::size_t hash(const BasicJsonType &j)
hash a JSON value
Definition: json.h:4680
Namespace for hasher-example.
Every class exported by the ns3 library is enclosed in the ns3 namespace.
Callback< R, Args... > MakeCallback(R(T::*memPtr)(Args...), OBJ objPtr)
Build Callbacks for class method members which take varying numbers of arguments and potentially retu...
Definition: callback.h:704
cmd
Definition: second.py:40