Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- Compile: g++-4.7 -Wall main.cpp -std=c++11 -lrt -O2
- Dictionary are from directly: http://dl.dropboxusercontent.com/u/4076606/words.txt
- http://kassoulet.blogspot.com/2007/12/how-to-clear-disk-cache-in-linux.html
- sync ; sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches'
- 10–19 ms
- */
- #include <iostream>
- #include <fstream>
- #include <unordered_set>
- //#include <string>
- //#include <vector>
- #include <algorithm> // std::lower_bound
- #include<stdio.h>
- #include<stdlib.h>
- #include<time.h>
- #include<string.h>
- #define START clock_gettime(CLOCK_REALTIME, &start);
- #define END(title) clock_gettime(CLOCK_REALTIME, &end);\
- cout << title << "\n" << \
- /*"nano sec: " << diff(start, end) << "\n" << \
- "micro sec: " << (diff(start, end)/1000.0) << "\n" <<*/ \
- "mili sec: " << (diff(start, end)/1000000.0) << "\n\n";
- using namespace std;
- //Interaction with HDD: ~ 38 ms (till)//16 ms
- //11–19 ms
- const int DICT_SIZE = 295068;
- const int MAX_WORD_LENGTH = 30;
- struct str {
- char a[MAX_WORD_LENGTH];
- };
- struct hashing_fn {
- size_t operator()(const struct str &a ) const {
- //unsigned /*long long*/ sum=0, base=1;
- //unsigned len=strlen(a.a);
- //for(unsigned i=0; i<len; ++i, base*=255)
- // sum += base*a.a[i];
- //return sum;
- size_t h = 2166136261;
- for (unsigned char i = 0, len=strlen(a.a); i < len; ++i)
- h = (h * 16777619) ^ a.a[i];
- return h%300000;
- }
- };
- bool operator == (struct str const& lhs, struct str const& rhs) {
- return strcmp(lhs.a,rhs.a);
- }
- bool operator <(struct str const& ms, struct str const i) {
- return (strcmp(ms.a, i.a) >= 0 ? false : true);//return ms.a < i.a;//Only when integers/doubles/bools.
- }
- struct str DICT[DICT_SIZE];
- vector<struct str> DICTV[DICT_SIZE];
- //80 = 13–20 parse,
- void constructDictionary(
- //vector<struct str> &dict) {
- unordered_set<string> &dict) {
- // unordered_set<struct str,struct hashing_fn> &dict) {
- //set<string> &dict) {
- ifstream wordListFile("dictionary.txt");
- //FILE* wordListFile = fopen("dictionary.txt", "r");
- dict.reserve(//658753);
- 299951);
- //306245);
- //351061);
- //string word;
- struct str w;
- // unsigned i=0;
- //unsigned max=0;
- while(
- //fgets(w.a, MAX_WORD_LENGTH, wordListFile) != NULL )
- wordListFile.getline(w.a, MAX_WORD_LENGTH) )
- //getline(wordListFile, word) )///wordListFile >> word )
- //if( strlen(w.a) != 0) {//!word.empty() ) {
- //strcpy (w.a, word.c_str());
- dict.insert(w);
- // DICT[i++] = w;
- //DICT[i++] = w;
- //dict[word] = true;
- //strcpy (dict[i++].a, word.c_str());//18–19 ms
- //if(word.length() > max) max = word.length();//max=29
- //i += word[0];
- //}
- //cout << "max=" << max << endl;
- //fclose(wordListFile);
- //dict.insert(DICT, DICT+DICT_SIZE);
- wordListFile.close();
- /* //Basic optimization:
- dict.reserve(299951);
- ifstream wordListFile("dictionary.txt");
- string word;
- while( getline(wordListFile, word) )
- if( !word.empty() )
- dict.insert(word);
- wordListFile.close();*/
- }
- inline void findArray() {
- for(int i=0; i<DICT_SIZE; ++i) {
- //struct str* b = // auto b =
- lower_bound(DICT, DICT+DICT_SIZE, DICT[i]);
- //printf("%s\n", b->a);
- }
- }
- inline void findSet(unordered_set<struct str,struct hashing_fn> &dict) {
- //struct str::iterator a;// auto a;
- for(int i=0; i<DICT_SIZE; ++i)
- dict.find(DICT[i]);
- }
- inline int64_t diff(struct timespec &start, struct timespec &end) {
- return (end.tv_sec - start.tv_sec)*1000000000 + end.tv_nsec - start.tv_nsec;
- }
- int main () {
- struct timespec start, end;
- ios_base::sync_with_stdio(false);
- //unordered_set<struct str,struct hashing_fn> dict;
- unordered_set<string> dict;
- //set<string> dict;
- //vector<struct str> dict(306245);
- //Stringstream
- //dict.reserve(306245);//With: (189+176+177)/3=180.67
- //Without: (259+238+241)/3=246; (236.67) jeigu su 231
- //load_factor = 0.445977
- //Summary: speed gains: 56 ms; 24 % of program
- //-O3 with: (115+117+133)/3=121.67
- //-O2 -O4 similar results to -O3 flag
- //--------------
- //getline(wordListFile, word) -> (102+99+101)/3=101
- cout << "size = " << dict.size() << "\n"
- << "bucket_count = " << dict.bucket_count() << "\n"
- << "load_factor = " << dict.load_factor() << "\n"
- << "max_load_factor = " << dict.max_load_factor() << "\n\n";
- START
- constructDictionary(dict);
- END("constructDictionary")
- /*START
- findArray();
- END("findArray")
- START
- findSet(dict);
- END("findSet")*/
- cout << "size = " << dict.size() << "\n"
- << "bucket_count = " << dict.bucket_count() << "\n"
- << "load_factor = " << dict.load_factor() << "\n"
- << "max_load_factor = " << dict.max_load_factor() << "\n\n";
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement