Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <fstream>
- #include <vector>
- #define BRACKETS 1000003
- uint32_t cdb_hashadd(uint32_t h, unsigned char c) {
- h += (h << 5);
- return h ^ c;
- }
- uint32_t cdb_hash(const unsigned char *buf, int len) {
- uint32_t h;
- h = 5381;
- while (len) {
- h = cdb_hashadd(h, *buf++);
- --len;
- }
- return h;
- }
- uint32_t fnv_hash_1a_32(const unsigned char *p, int len) {
- unsigned h = 0x811c9dc5;
- int i;
- for (i = 0; i < len; i++)
- h = (h ^ p[i]) * 0x01000193;
- return h;
- }
- uint32_t djb2(const unsigned char *p, int len)
- {
- unsigned long hash = 5381;
- for (int i = 0; i < len; i++)
- hash = ((hash << 5) + hash) + *p++; /* hash * 33 + c */
- return hash;
- }
- uint32_t djb2a(const unsigned char *p, int len)
- {
- unsigned long hash = 5381;
- for (int i = 0; i < len; i++)
- hash = ((hash << 5) + hash) ^ (*p++); /* hash * 33 ^ c */
- return hash;
- }
- uint32_t murmur3_32(const uint8_t* key, int len)
- {
- uint32_t h = 0x12345678;
- if (len > 3) {
- size_t i = len >> 2;
- do {
- uint32_t k;
- memcpy(&k, key, sizeof(uint32_t));
- key += sizeof(uint32_t);
- k *= 0xcc9e2d51;
- k = (k << 15) | (k >> 17);
- k *= 0x1b873593;
- h ^= k;
- h = (h << 13) | (h >> 19);
- h = h * 5 + 0xe6546b64;
- } while (--i);
- }
- if (len & 3) {
- size_t i = len & 3;
- uint32_t k = 0;
- do {
- k <<= 8;
- k |= key[i - 1];
- } while (--i);
- k *= 0xcc9e2d51;
- k = (k << 15) | (k >> 17);
- k *= 0x1b873593;
- h ^= k;
- }
- h ^= len;
- h ^= h >> 16;
- h *= 0x85ebca6b;
- h ^= h >> 13;
- h *= 0xc2b2ae35;
- h ^= h >> 16;
- return h;
- }
- void testhash(const char *name, std::vector<std::string> &words, uint32_t func(const unsigned char *p, int len)) {
- std::vector<int> counts(BRACKETS, 0);
- for (std::string &word: words) {
- uint32_t hash = func(reinterpret_cast<const unsigned char *>(word.c_str()), word.size());
- counts[hash % BRACKETS]++;
- }
- std::vector<int> bracketMap(1000, 0);
- for (int c: counts) {
- if (c < bracketMap.size()) bracketMap[c]++;
- }
- printf("%s:\n", name);
- for (int i = 1; i < bracketMap.size(); i++) {
- if (bracketMap[i] == 0) continue;
- printf("%d: %d\n", i, bracketMap[i]);
- }
- printf("\n");
- }
- int main() {
- std::vector<std::string> words;
- std::ifstream input("../words.txt");
- for (std::string line; getline(input, line);) {
- words.emplace_back(line);
- }
- // words.resize(10000);
- printf("Total words: %d\nBrackets: %d\n\n", words.size(), BRACKETS);
- testhash("cdb_hash", words, cdb_hash);
- testhash("fnv_hash_1a_32", words, fnv_hash_1a_32);
- testhash("djb2", words, djb2);
- testhash("djb2a", words, djb2a);
- testhash("murmur3_32", words, murmur3_32);
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement