Advertisement
Guest User

Untitled

a guest
Aug 24th, 2019
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.08 KB | None | 0 0
  1. #include <iostream>
  2. #include <fstream>
  3. #include <vector>
  4.  
  5. #define BRACKETS 1000000
  6.  
  7. uint32_t cdb_hashadd(uint32_t h, unsigned char c) {
  8. h += (h << 5);
  9. return h ^ c;
  10. }
  11.  
  12. uint32_t cdb_hash(const char *buf, unsigned int len) {
  13. uint32_t h;
  14.  
  15. h = 5381;
  16. while (len) {
  17. h = cdb_hashadd(h, *buf++);
  18. --len;
  19. }
  20. return h;
  21. }
  22.  
  23. int main() {
  24. std::vector<std::string> words;
  25.  
  26. std::ifstream input("../words.txt");
  27. for (std::string line; getline(input, line);) {
  28. words.emplace_back(line);
  29. }
  30.  
  31. printf("Total words: %d\n\n", words.size());
  32.  
  33. std::vector<int> counts(BRACKETS, 0);
  34. for (std::string &word: words) {
  35. uint32_t hash = cdb_hash(word.c_str(), word.size());
  36. counts[hash % BRACKETS]++;
  37. }
  38.  
  39. std::vector<int> bracketMap(1000, 0);
  40. for (int c: counts) {
  41. if (c < bracketMap.size()) bracketMap[c]++;
  42. }
  43.  
  44. for (int i = 1; i < bracketMap.size(); i++) {
  45. if (bracketMap[i] == 0) continue;
  46.  
  47. printf("%d: %d\n", i, bracketMap[i]);
  48. }
  49.  
  50. return 0;
  51. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement