Advertisement
kestutis

Read dictionary from file

Oct 10th, 2013
372
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 4.62 KB | None | 0 0
  1. /*
  2. Compile: g++-4.7 -Wall main.cpp -std=c++11 -lrt -O2
  3.  
  4. Dictionary are from directly: http://dl.dropboxusercontent.com/u/4076606/words.txt
  5.  
  6. http://kassoulet.blogspot.com/2007/12/how-to-clear-disk-cache-in-linux.html
  7. sync ; sudo sh -c 'echo 3 > /proc/sys/vm/drop_caches'
  8.  
  9. 10–19 ms
  10. */
  11.  
  12. #include <iostream>
  13. #include <fstream>
  14. #include <unordered_set>
  15. //#include <string>
  16. //#include <vector>
  17. #include <algorithm>    // std::lower_bound
  18.  
  19. #include<stdio.h>
  20. #include<stdlib.h>
  21. #include<time.h>
  22. #include<string.h>
  23.  
  24. #define START clock_gettime(CLOCK_REALTIME, &start);
  25. #define END(title)  clock_gettime(CLOCK_REALTIME, &end);\
  26.     cout << title << "\n" << \
  27.         /*"nano  sec: " << diff(start, end) << "\n" << \
  28.         "micro sec: " << (diff(start, end)/1000.0) << "\n" <<*/ \
  29.         "mili  sec: " << (diff(start, end)/1000000.0) << "\n\n";
  30.  
  31. using namespace std;
  32.  
  33. //Interaction with HDD: ~ 38 ms (till)//16 ms
  34.  
  35. //11–19 ms
  36. const int DICT_SIZE = 295068;
  37. const int MAX_WORD_LENGTH = 30;
  38. struct str {
  39.     char a[MAX_WORD_LENGTH];
  40. };
  41. struct hashing_fn {
  42.     size_t operator()(const struct str &a ) const {
  43.         //unsigned /*long long*/ sum=0, base=1;
  44.         //unsigned len=strlen(a.a);
  45.         //for(unsigned i=0; i<len; ++i, base*=255)
  46.         //  sum += base*a.a[i];
  47.         //return sum;
  48.         size_t h = 2166136261;
  49.         for (unsigned char i = 0, len=strlen(a.a); i < len; ++i)
  50.             h = (h * 16777619) ^ a.a[i];
  51.         return h%300000;
  52.     }
  53. };
  54.  
  55. bool operator == (struct str const& lhs, struct str const& rhs) {
  56.     return strcmp(lhs.a,rhs.a);
  57. }
  58. bool operator <(struct str const& ms, struct str const i) {
  59.     return (strcmp(ms.a, i.a) >= 0 ? false : true);//return ms.a < i.a;//Only when integers/doubles/bools.
  60. }
  61.  
  62. struct str DICT[DICT_SIZE];
  63. vector<struct str> DICTV[DICT_SIZE];
  64. //80 = 13–20 parse,
  65. void constructDictionary(
  66. //vector<struct str> &dict) {
  67.     unordered_set<string> &dict) {
  68. //  unordered_set<struct str,struct hashing_fn> &dict) {
  69.     //set<string> &dict) {
  70.  
  71.     ifstream wordListFile("dictionary.txt");
  72.     //FILE* wordListFile = fopen("dictionary.txt", "r");
  73.     dict.reserve(//658753);
  74.     299951);
  75.     //306245);
  76.     //351061);
  77.     //string word;
  78.     struct str w;
  79. //  unsigned i=0;
  80.     //unsigned max=0;
  81.     while(
  82.         //fgets(w.a, MAX_WORD_LENGTH, wordListFile) != NULL )
  83.         wordListFile.getline(w.a, MAX_WORD_LENGTH) )
  84.         //getline(wordListFile, word) )///wordListFile >> word )
  85.         //if( strlen(w.a) != 0) {//!word.empty() ) {
  86.             //strcpy (w.a, word.c_str());
  87.             dict.insert(w);
  88. //          DICT[i++] = w;
  89.             //DICT[i++] = w;
  90.             //dict[word] = true;
  91.             //strcpy (dict[i++].a, word.c_str());//18–19 ms
  92.             //if(word.length() > max) max = word.length();//max=29
  93.             //i += word[0];
  94.         //}
  95.     //cout << "max=" << max << endl;
  96.     //fclose(wordListFile);
  97.     //dict.insert(DICT, DICT+DICT_SIZE);
  98.     wordListFile.close();
  99.     /* //Basic optimization:
  100.     dict.reserve(299951);
  101.     ifstream wordListFile("dictionary.txt");
  102.     string word;
  103.     while( getline(wordListFile, word) )
  104.         if( !word.empty() )
  105.             dict.insert(word);
  106.  
  107.     wordListFile.close();*/
  108. }
  109.  
  110. inline void findArray() {
  111.     for(int i=0; i<DICT_SIZE; ++i) {
  112.         //struct str* b = // auto b =
  113.         lower_bound(DICT, DICT+DICT_SIZE, DICT[i]);
  114.         //printf("%s\n", b->a);
  115.     }
  116. }
  117.  
  118. inline void findSet(unordered_set<struct str,struct hashing_fn> &dict) {
  119.     //struct str::iterator a;// auto a;
  120.     for(int i=0; i<DICT_SIZE; ++i)
  121.         dict.find(DICT[i]);
  122. }
  123.  
  124.  
  125. inline int64_t diff(struct timespec &start, struct timespec &end) {
  126.     return (end.tv_sec - start.tv_sec)*1000000000 + end.tv_nsec - start.tv_nsec;
  127. }
  128. int main () {
  129.     struct timespec start, end;
  130.  
  131.     ios_base::sync_with_stdio(false);
  132.     //unordered_set<struct str,struct hashing_fn> dict;
  133.     unordered_set<string> dict;
  134.     //set<string> dict;
  135.     //vector<struct str> dict(306245);
  136.  
  137.     //Stringstream
  138.  
  139.     //dict.reserve(306245);//With: (189+176+177)/3=180.67
  140.     //Without: (259+238+241)/3=246; (236.67) jeigu su 231
  141.     //load_factor = 0.445977
  142.     //Summary: speed gains: 56 ms; 24 % of program
  143.  
  144.     //-O3 with: (115+117+133)/3=121.67
  145.     //-O2 -O4 similar results to -O3 flag
  146.  
  147.     //--------------
  148.     //getline(wordListFile, word) -> (102+99+101)/3=101
  149.  
  150.     cout << "size = " << dict.size() << "\n"
  151.         << "bucket_count = " << dict.bucket_count() << "\n"
  152.         << "load_factor = " << dict.load_factor() << "\n"
  153.         << "max_load_factor = " << dict.max_load_factor() << "\n\n";
  154.  
  155.     START
  156.     constructDictionary(dict);
  157.     END("constructDictionary")
  158.  
  159.     /*START
  160.     findArray();
  161.     END("findArray")
  162.  
  163.     START
  164.     findSet(dict);
  165.     END("findSet")*/
  166.  
  167.     cout << "size = " << dict.size() << "\n"
  168.         << "bucket_count = " << dict.bucket_count() << "\n"
  169.         << "load_factor = " << dict.load_factor() << "\n"
  170.         << "max_load_factor = " << dict.max_load_factor() << "\n\n";
  171.     return 0;
  172. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement