A Discrete-Event Network Simulator
API
hash-example.cc
Go to the documentation of this file.
1 /* -*- Mode:C++; c-file-style:"gnu"; indent-tabs-mode:nil; -*- */
2 /*
3  * This program is free software; you can redistribute it and/or modify
4  * it under the terms of the GNU General Public License version 2 as
5  * published by the Free Software Foundation;
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * along with this program; if not, write to the Free Software
14  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
15  */
16 
17 #include <algorithm> // find
18 #include <climits> // CHAR_BIT
19 #include <fstream>
20 #include <iostream>
21 #include <iomanip>
22 #include <map>
23 #include <vector>
24 
25 #include "ns3/core-module.h"
26 #include "ns3/hash.h"
27 
103 namespace ns3 {
104 
105 NS_LOG_COMPONENT_DEFINE ("Hasher");
106 
107 namespace Hash {
108 
113 namespace Example {
114 
118 class Collider
119 {
120 
121 public:
122  std::string m_name;
126  enum Bits
127  {
129  Bits64
130  };
131 
139  Collider (const std::string name, Hasher hash, const enum Bits bits)
140  : m_name (name),
141  m_hash (hash),
142  m_bits (bits)
143  { }
144 
151  bool Add (const std::string phrase)
152  {
153  uint64_t h = GetHash (phrase);
154 
155  // Check for collisions
156  if (m_dict.count (h) > 0)
157  {
158  // we've seen this hash before, why?
159  if (phrase == m_dict[h])
160  {
161  // duplicate phrase
162  return false;
163  }
164 
165  // new phrase generating a real collision
166  // alphabetize
167  if ( m_dict[h] < phrase)
168  {
169  m_coll.push_back (std::make_pair (h, phrase));
170  }
171  else
172  {
173  m_coll.push_back (std::make_pair (h, m_dict[h]));
174  m_dict[h] = phrase;
175  }
176  }
177  else
178  {
179  // Insert new hash
180  m_dict.insert (std::make_pair (h, phrase));
181  }
182  return true;
183  } // Add ()
184 
188  std::string GetName () const
189  {
190  std::string name = m_name;
191 
192  switch (m_bits)
193  {
194  /* *NS_CHECK_STYLE_OFF* */
195  case Bits32: name += " (32-bit version)"; break;
196  case Bits64: name += " (64-bit version)"; break;
197  default: name += " (unknown!?!)";
198  /* *NS_CHECK_STYLE_ON* */
199  }
200  return name;
201  }
202 
204  void Report () const
205  {
206  std::cout << std::endl;
207 
208  std::cout << GetName () << ": " << m_coll.size () << " collisions:"
209  << std::endl;
210  for (auto collision : m_coll)
211  {
212  uint64_t h = collision.first;
213 
214  std::cout << std::setfill ('0') << std::hex << std::setw (8) << h
215  << std::dec << std::setfill (' ') << " "
216  << std::setw (20) << std::left
217  << m_dict.find (h)->second
218  << collision.second
219  << std::right
220  << std::endl;
221  }
222  } // Report ()
223 
224 private:
225 
232  uint64_t GetHash (const std::string phrase)
233  {
234  m_hash.clear ();
235  uint64_t h = 0;
236 
237  if (m_bits == Bits32)
238  {
239  h = m_hash.GetHash32 (phrase);
240  }
241  else
242  {
243  h = m_hash.GetHash64 (phrase);
244  }
245  return h;
246  }
247 
249  enum Bits m_bits;
250 
252  typedef std::map <uint64_t, std::string> hashdict_t;
253 
256 
258  typedef std::vector < std::pair<uint64_t, std::string> > collision_t;
259 
262 
263 }; // class Collider
264 
265 
270 {
271 public:
274  : m_nphrases (0)
275  {
276  m_words.reserve (320000);
277  }
278 
284  void Add (Collider c)
285  {
286  m_hashes.push_back (c);
287  }
288 
294  void Add (const std::string phrase)
295  {
296  if (phrase.size () == 0)
297  {
298  return;
299  }
300 
301  int newPhrases = 0;
302  for (auto & collider : m_hashes)
303  {
304  newPhrases += collider.Add (phrase);
305  }
306 
307  if (newPhrases)
308  {
309  ++m_nphrases;
310  m_words.push_back (phrase);
311  }
312  } // Add ()
313 
352  {
353  // Expected number of collisions
354  //
355  // Number of buckets = k = 2^bits
356  long double k32 = 0xFFFFFFFF;
357  long double k64 = static_cast<long double> (0xFFFFFFFFFFFFFFFFULL);
358 
359  long double n = m_nphrases;
360  long double Ec32 = n * (n - 1) / ( 2 * k32) * (1 - (n - 2) / (3 * k32));
361  long double Ec64 = n * (n - 1) / ( 2 * k64) * (1 - (n - 2) / (3 * k64));
362 
363  // Output collisions
364  std::cout << "" << std::endl;
365  std::cout << "Number of words or phrases: " << n << std::endl;
366  std::cout << "Expected number of collisions: (32-bit table) " << Ec32
367  << std::endl;
368  std::cout << "Expected number of collisions: (64-bit table) " << Ec64
369  << std::endl;
370  } // ReportExpectedCollisions
371 
372 
374  void Report () const
375  {
377 
378  for (auto collider : m_hashes)
379  {
380  collider.Report ();
381  }
382  } // Report ()
383 
389  void TimeOne (const Collider & collider)
390  {
391  // Hashing speed
392  uint32_t reps = 100;
393  Hasher h = collider.m_hash;
394  int start = clock ();
395  for (auto const & word : m_words)
396  {
397  for (uint32_t i = 0; i < reps; ++i)
398  {
399  h.clear ().GetHash32 (word);
400  }
401  }
402  int stop = clock ();
403  double delta = stop - start;
404  double per = 1e9 * delta / (m_nphrases * reps * CLOCKS_PER_SEC);
405 
406  std::cout << std::left
407  << std::setw (32) << collider.GetName ()
408  << std::right
409  << std::setw (10) << m_nphrases
410  << std::setw (10) << reps
411  << std::setw (10) << stop - start
412  << std::setw (12) << per
413  << std::endl;
414 
415  } // TimeOne ()
416 
418  void Time ()
419  {
420  std::cout << "" << std::endl;
421  std::cout << std::left
422  << std::setw (32) << "Hash timing"
423  << std::right
424  << std::setw (10) << "Phrases"
425  << std::setw (10) << "Reps"
426  << std::setw (10) << "Ticks"
427  << std::setw (12) << "ns/hash"
428  << std::endl;
429 
430  for (auto const & collider : m_hashes)
431  {
432  TimeOne (collider);
433  }
434  } // Time ()
435 
436 private:
437  unsigned long m_nphrases;
438  std::vector <Collider> m_hashes;
439  std::vector <std::string> m_words;
441 }; // class Dictionary
442 
443 
448 {
449 
450 public:
451 
458  bool Add (const std::string file)
459  {
460  if (std::find (m_files.begin (), m_files.end (), file) == m_files.end ())
461  {
462  m_files.push_back (file);
463  }
464 
465  return true;
466  }
467 
469  static std::string GetDefault (void)
470  {
471  return "/usr/share/dict/words";
472  }
473 
479  void ReadInto (Dictionary & dict)
480  {
481  if (m_files.size () == 0)
482  {
483  Add (GetDefault ());
484  }
485 
486  std::cout << "Hashing the dictionar"
487  << (m_files.size () == 1 ? "y" : "ies")
488  << std::endl;
489 
490  for (auto dictFile : m_files)
491  {
492  std::cout << "Dictionary file: " << dictFile << std::endl;
493 
494  // Find collisions
495 
496  // Open the file
497  std::ifstream dictStream;
498  dictStream.open (dictFile.c_str () );
499  if (!dictStream.is_open () )
500  {
501  std::cerr << "Failed to open dictionary file."
502  << "'" << dictFile << "'"
503  << std::endl;
504  continue;
505  }
506 
507  while (dictStream.good () )
508  {
509  std::string phrase;
510  getline (dictStream, phrase);
511  dict.Add (phrase);
512  } // while
513 
514  dictStream.close ();
515 
516  } // for m_files
517 
518  } // ReadInto
519 
520 private:
521  std::vector <std::string> m_files;
523 }; // class DictFiles
524 
525 } // namespace Example
526 
527 } // namespace Hash
528 
529 } // namespace ns3
530 
531 
532 using namespace ns3;
533 using namespace ns3::Hash::Example;
534 
535 int
536 main (int argc, char *argv[])
537 {
538  std::cout << std::endl;
539  std::cout << "Hasher" << std::endl;
540 
541  bool timing = false;
542  DictFiles files;
543 
544  CommandLine cmd (__FILE__);
545  cmd.Usage ("Find hash collisions in the dictionary.");
546  cmd.AddValue ("dict", "Dictionary file to hash",
548  &files),
550 
551  cmd.AddValue ("time", "Run timing test", timing);
552  cmd.Parse (argc, argv);
553 
554  Dictionary dict;
555  dict.Add ( Collider ("FNV1a",
556  Hasher ( Create<Hash::Function::Fnv1a> () ),
558  dict.Add ( Collider ("FNV1a",
559  Hasher ( Create<Hash::Function::Fnv1a> () ),
561 
562  dict.Add ( Collider ("Murmur3",
563  Hasher ( Create<Hash::Function::Murmur3> () ),
565  dict.Add ( Collider ("Murmur3",
566  Hasher ( Create<Hash::Function::Murmur3> () ),
568 
569  files.ReadInto (dict);
570 
571  dict.Report ();
572 
573  if (timing)
574  {
575  dict.Time ();
576  } // if (timing)
577 
578 
579 } // main
Parse command-line arguments.
Definition: command-line.h:229
Keep track of collisions.
collision_t m_coll
The list of collisions.
std::map< uint64_t, std::string > hashdict_t
Hashed dictionary of first instance of each hash.
uint64_t GetHash(const std::string phrase)
Get the appropriate hash value.
std::string m_name
Name of this hash.
void Report() const
Print the collisions found.
Collider(const std::string name, Hasher hash, const enum Bits bits)
Constructor.
Bits
The size of hash function being tested.
@ Bits64
Use 64-bit hash function.
@ Bits32
Use 32-bit hash function.
std::vector< std::pair< uint64_t, std::string > > collision_t
Collision map of subsequent instances.
bool Add(const std::string phrase)
Add a string to the Collider.
enum Bits m_bits
Hash function.
std::string GetName() const
hashdict_t m_dict
The dictionary map, indexed by hash.
Source word list files.
std::vector< std::string > m_files
List of word files to use.
bool Add(const std::string file)
CommandLine callback function to add a file argument to the list.
void ReadInto(Dictionary &dict)
Add phrases from the files into the dict.
static std::string GetDefault(void)
Word list and hashers to test.
void ReportExpectedCollisions() const
Report the expected number of collisions.
void Add(Collider c)
Add a Collider containing a hash function.
std::vector< std::string > m_words
List of unique words.
std::vector< Collider > m_hashes
List of hash Colliders.
void Report() const
Print the collisions for each Collider.
void TimeOne(const Collider &collider)
Time and report the execution of one hash across the entire Dictionary.
unsigned long m_nphrases
Number of strings hashed.
void Add(const std::string phrase)
Add a string to the dictionary.
void Time()
Report the execution time of each hash across the entire Dictionary.
Generic Hash function interface.
Definition: hash.h:88
uint32_t GetHash32(const char *buffer, const std::size_t size)
Compute 32-bit hash of a byte buffer.
Definition: hash.h:239
uint64_t GetHash64(const char *buffer, const std::size_t size)
Compute 64-bit hash of a byte buffer.
Definition: hash.h:247
Hasher & clear(void)
Restore initial state.
Definition: hash.cc:55
#define NS_LOG_COMPONENT_DEFINE(name)
Define a Log component with a specific name.
Definition: log.h:205
std::size_t hash(const BasicJsonType &j)
hash a JSON value
Definition: json.h:4680
Namespace for hasher-example.
Every class exported by the ns3 library is enclosed in the ns3 namespace.
Callback< R, Ts... > MakeCallback(R(T::*memPtr)(Ts...), OBJ objPtr)
Build Callbacks for class method members which take varying numbers of arguments and potentially retu...
Definition: callback.h:1648
cmd
Definition: second.py:35
def start()
Definition: core.py:1853