// C++11 only
#include <unordered_map>
#include <sys/socket.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <sys/types.h>
#include <algorithm>
#include <unistd.h>
#include <netdb.h>
#include <sstream>
#include <cstring>
#include <string>
#include <vector>
#include <cstdio>
#include <queue>
#include <map>
#include <set>
using namespace std;
struct fgrep{
unordered_map<char,fgrep*> child;
vector<pair<int,int>> hits;
fgrep* fail;
void insert(string const s,int const name=0){
fgrep* state=this;
for (char c: s){
if (not state->child.count(c)) state->child[c]=new fgrep();
state=state->child[c];
}
state->hits.emplace_back(name,s.size());
}
void build(){
vector<fgrep*> todo={fail=this};
for (int i=0; i<todo.size(); i++){
for (auto j: todo[i]->child){
j.second->fail=todo[i]->fail;
while (j.second->fail!=this and not j.second->fail->child.count(j.first)) j.second->fail=j.second->fail->fail;
if (j.second->fail->child.count(j.first) and j.second->fail->child[j.first]!=j.second) j.second->fail=j.second->fail->child[j.first];
for (auto k: j.second->fail->hits) j.second->hits.push_back(k);
todo.push_back(j.second);
}
}
}
set<pair<int,int>> match(string const s){
set<pair<int,int>> res;
fgrep* state=this;
for (int i=0; i<s.size(); i++){
char c=s[i];
while (state!=this and not state->child.count(c)) state=state->fail;
if (state->child.count(c)) state=state->child[c];
for (auto j: state->hits) res.insert(make_pair(i-j.second+1,j.first));
}
return res;
}
fgrep(){}
fgrep(initializer_list<string> const needles){
int i=0;
for (string const& s: needles) insert(s,i++);
build();
}
~fgrep(){
for (auto i: child) delete i.second;
}
};
string http(string const host,string const page){
int sh; for (;;){
int sh=socket(PF_INET,SOCK_STREAM,IPPROTO_TCP); if (sh<0) break;
sockaddr_in sad={AF_INET,htons(80)};
addrinfo* sinf;
if (getaddrinfo(host.c_str(),"http",0,&sinf)) break;
if ((connect(sh,sinf->ai_addr,sinf->ai_addrlen)<0) ^ (freeaddrinfo(sinf),0)) break;
long long unsigned const size=0x1000000;
char* buffer=new char[size];
sprintf(buffer,"GET /%s\nHost: %s\n\n",page.c_str(),host.c_str());
int need=strlen(buffer)+1,i=0;
if (not write(sh,buffer,need)) break;
for (char* bup=buffer; i=read(sh,bup,size-2); bup+=i);
string res=buffer;
delete[] buffer;
shutdown(sh,SHUT_RDWR),close(sh);
return res;
}
return close(sh), "";
}
string tag(string const& s,int i){
stringstream res;
int state=0, depth=0; do if (s[i]>=0 and s[i]<128){
if (state&2) state^=(s[i]=='\'')<<1; else
if (state&4) state^=(s[i]== '"')<<2; else
switch (s[i]){
case '<': state|= 1, ++depth; break;
case '>': state&=~1; break;
default:{
if (state&1) switch (s[i]){
case '\'': state|= 2; break;
case '"': state|= 4; break;
case '/': depth-=2; break;
}else{
res<<(s[i]=='\t'? ' ':s[i]);
}
}
}
}
while (s[++i] and (state or depth));
return res.str();
}
int main(){
string s=http("www.thestudentroom.co.uk","showthread.php?t=2026840&page=2&p=38622645");
for (auto i: fgrep{" Ago: ","<a class=\"bigusername\" href=\"member.php?u=","vbmenu_register(\"postmenu_"}.match(s)){
switch (i.second){
case 0:{
stringstream t; for (int j=i.first+6; s[j]!='\n'; t<<s[j++]); string t2=t.str();
printf("%s\t",t2.c_str());
break;
}
case 1:{
string const t=tag(s,i.first);
printf("%s\t",t.c_str());
break;
}
case 2:{
stringstream t; for (int j=i.first+26; s[j]!='"'; t<<s[j++]); string t2=t.str();
printf("%s\n",t2.c_str());
break;
}
}
}
}