Don't like ads? PRO users don't see any ads ;-)
Guest

Bad practice

By: a guest on Jul 20th, 2012  |  syntax: C++  |  size: 3.94 KB  |  hits: 46  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
This paste has a previous version, view the difference. Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. // C++11 only
  2.  
  3. #include <unordered_map>
  4. #include <sys/socket.h>
  5. #include <netinet/in.h>
  6. #include <arpa/inet.h>
  7. #include <sys/types.h>
  8. #include <algorithm>
  9. #include <unistd.h>
  10. #include <netdb.h>
  11. #include <sstream>
  12. #include <cstring>
  13. #include <string>
  14. #include <vector>
  15. #include <cstdio>
  16. #include <queue>
  17. #include <map>
  18. #include <set>
  19. using namespace std;
  20.  
  21. struct fgrep{
  22.   unordered_map<char,fgrep*> child;
  23.   vector<pair<int,int>> hits;
  24.   fgrep* fail;
  25.  
  26.   void insert(string const s,int const name=0){
  27.     fgrep* state=this;
  28.     for (char c: s){
  29.       if (not state->child.count(c)) state->child[c]=new fgrep();
  30.       state=state->child[c];
  31.     }
  32.     state->hits.emplace_back(name,s.size());
  33.   }
  34.  
  35.   void build(){
  36.     vector<fgrep*> todo={fail=this};
  37.     for (int i=0; i<todo.size(); i++){
  38.       for (auto j: todo[i]->child){
  39.         j.second->fail=todo[i]->fail;
  40.         while (j.second->fail!=this and not j.second->fail->child.count(j.first)) j.second->fail=j.second->fail->fail;
  41.         if (j.second->fail->child.count(j.first) and j.second->fail->child[j.first]!=j.second) j.second->fail=j.second->fail->child[j.first];
  42.         for (auto k: j.second->fail->hits) j.second->hits.push_back(k);
  43.         todo.push_back(j.second);
  44.       }
  45.     }
  46.   }
  47.  
  48.   set<pair<int,int>> match(string const s){
  49.     set<pair<int,int>> res;
  50.     fgrep* state=this;
  51.     for (int i=0; i<s.size(); i++){
  52.       char c=s[i];
  53.       while (state!=this and not state->child.count(c)) state=state->fail;
  54.       if (state->child.count(c)) state=state->child[c];
  55.       for (auto j: state->hits) res.insert(make_pair(i-j.second+1,j.first));
  56.     }
  57.     return res;
  58.   }
  59.  
  60.   fgrep(){}
  61.  
  62.   fgrep(initializer_list<string> const needles){
  63.     int i=0;
  64.     for (string const& s: needles) insert(s,i++);
  65.     build();
  66.   }
  67.  
  68.   ~fgrep(){
  69.     for (auto i: child) delete i.second;
  70.   }
  71. };
  72.  
  73. string http(string const host,string const page){
  74.   int sh; for (;;){
  75.     int sh=socket(PF_INET,SOCK_STREAM,IPPROTO_TCP); if (sh<0) break;
  76.     sockaddr_in sad={AF_INET,htons(80)};
  77.  
  78.     addrinfo* sinf;
  79.     if (getaddrinfo(host.c_str(),"http",0,&sinf)) break;
  80.     if ((connect(sh,sinf->ai_addr,sinf->ai_addrlen)<0) ^ (freeaddrinfo(sinf),0)) break;
  81.  
  82.     long long unsigned const size=0x1000000;
  83.     char* buffer=new char[size];
  84.     sprintf(buffer,"GET /%s\nHost: %s\n\n",page.c_str(),host.c_str());
  85.     int need=strlen(buffer)+1,i=0;
  86.     if (not write(sh,buffer,need)) break;
  87.     for (char* bup=buffer; i=read(sh,bup,size-2); bup+=i);
  88.     string res=buffer;
  89.     delete[] buffer;
  90.  
  91.     shutdown(sh,SHUT_RDWR),close(sh);
  92.     return res;
  93.   }
  94.   return close(sh), "";
  95. }
  96.  
  97. string tag(string const& s,int i){
  98.   stringstream res;
  99.   int state=0, depth=0; do if (s[i]>=0 and s[i]<128){
  100.     if (state&2) state^=(s[i]=='\'')<<1; else
  101.     if (state&4) state^=(s[i]== '"')<<2; else
  102.     switch (s[i]){
  103.       case '<': state|= 1, ++depth; break;
  104.       case '>': state&=~1; break;
  105.       default:{
  106.         if (state&1) switch (s[i]){
  107.           case '\'': state|= 2; break;
  108.           case  '"': state|= 4; break;
  109.           case  '/': depth-=2; break;
  110.         }else{
  111.           res<<(s[i]=='\t'? ' ':s[i]);
  112.         }
  113.       }
  114.     }
  115.   }
  116.   while (s[++i] and (state or depth));
  117.   return res.str();
  118. }
  119.  
  120. int main(){
  121.   string s=http("www.thestudentroom.co.uk","showthread.php?t=2026840&page=2&p=38622645");
  122.   for (auto i: fgrep{" Ago: ","<a class=\"bigusername\" href=\"member.php?u=","vbmenu_register(\"postmenu_"}.match(s)){
  123.     switch (i.second){
  124.       case 0:{
  125.         stringstream t; for (int j=i.first+6; s[j]!='\n'; t<<s[j++]); string t2=t.str();
  126.         printf("%s\t",t2.c_str());
  127.         break;
  128.       }
  129.       case 1:{
  130.         string const t=tag(s,i.first);
  131.         printf("%s\t",t.c_str());
  132.         break;
  133.       }
  134.       case 2:{
  135.         stringstream t; for (int j=i.first+26; s[j]!='"'; t<<s[j++]); string t2=t.str();
  136.         printf("%s\n",t2.c_str());
  137.         break;
  138.       }
  139.     }
  140.   }
  141. }