Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <fstream>
- #include <vector>
- #define BRACKETS 1000000
- uint32_t cdb_hashadd(uint32_t h, unsigned char c) {
- h += (h << 5);
- return h ^ c;
- }
- uint32_t cdb_hash(const char *buf, unsigned int len) {
- uint32_t h;
- h = 5381;
- while (len) {
- h = cdb_hashadd(h, *buf++);
- --len;
- }
- return h;
- }
- int main() {
- std::vector<std::string> words;
- std::ifstream input("../words.txt");
- for (std::string line; getline(input, line);) {
- words.emplace_back(line);
- }
- printf("Total words: %d\n\n", words.size());
- std::vector<int> counts(BRACKETS, 0);
- for (std::string &word: words) {
- uint32_t hash = cdb_hash(word.c_str(), word.size());
- counts[hash % BRACKETS]++;
- }
- std::vector<int> bracketMap(1000, 0);
- for (int c: counts) {
- if (c < bracketMap.size()) bracketMap[c]++;
- }
- for (int i = 1; i < bracketMap.size(); i++) {
- if (bracketMap[i] == 0) continue;
- printf("%d: %d\n", i, bracketMap[i]);
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement