Advertisement
Guest User

codegolf.stackexchange.com/a/44592

a guest
Jan 16th, 2015
398
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 2.36 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <string.h>
  4. #include <sys/socket.h>
  5. #include <netdb.h>
  6. #include <regex.h>
  7.  
  8. #define SZ 1024 //good default buffer size
  9. #define ERROR(s) (puts(s)&&0)
  10. #define VERROR(s) ((void*)(long)ERROR(s))
  11.  
  12. main()
  13. {
  14.   char* SITE = "www.stroustrup.com";
  15.   char* PAGE = "C++.html";
  16.   char* REGX = "((http://)?www([./#\\+-]\\w*)+)";
  17.   int s = connect_to(SITE);
  18.   FILE* f = fetch_page(s,SITE,PAGE);
  19.   if (f)
  20.     list_matches(f,REGX);
  21.   else
  22.     return printf("Can't fetch page %s/%s\n", SITE, PAGE);
  23.   fclose(f);
  24.   close(s);
  25. }
  26.  
  27. int connect_to(char* w)
  28. {
  29.   // returns socket connected to website w.
  30.   int s = socket(AF_INET, SOCK_STREAM, 0);
  31.   // lookup host; address = port 80 of ipaddr; connect
  32.   struct hostent* h = gethostbyname(w);
  33.   if (!h)
  34.     return ERROR("No Such Host");
  35.   struct sockaddr_in a;
  36.   a.sin_port = htons(80);
  37.   a.sin_family = AF_INET;
  38.   memcpy(&a.sin_addr.s_addr, h->h_addr, h->h_length);
  39.   if (connect(s, (struct sockaddr*)&a, sizeof(a)) < 0)
  40.     return ERROR("can't connect");
  41.   else
  42.     return s;
  43. }
  44.  
  45. FILE* fetch_page(int s, char*w, char* p)
  46. {
  47.   // returns open file handle for page p from site w connected to socket s
  48.   // create cache file; allocate buffer; check for errors; compose request
  49.   FILE*f = fopen("/tmp/wcache", "w+");
  50.   size_t n;
  51.   char* b = malloc(SZ);
  52.   if (!s || !f || !b)
  53.     return VERROR("Resource Error");
  54.   sprintf(b, "GET / HTTP/1.0\r\nHost:%s\r\nAccept:*/*\r\nConnection:close\r\n\r\n", w);
  55.   // send request; receive request; write to cache file
  56.   send(s, b, strlen(b), 0);
  57.   while ((n = recv(s, b, SZ, 0)) > 0)
  58.     fwrite(b, 1, n, f);
  59.   n = 0;
  60.   fseek(f, n, SEEK_SET);
  61.   fgets(b,SZ,f); // read cached page, check response is OK
  62.   if (!f || strncmp(b, "HTTP/", 5))
  63.     return VERROR("Invalid Response");
  64.   if (strtok(b, " ") && atoi(strtok(0, " ")) != 200)
  65.     return VERROR("Bad Status Code");
  66.   while (getline(&b, &n, f)>=0 && *b!='\r'); // discard headers upto blank line
  67.   free(b);
  68.   return f;
  69. }
  70.  
  71. void list_matches(FILE* f, char* regx)
  72. {
  73.   //prints all strings from f which match regx
  74.   regex_t r;
  75.   size_t n = SZ;
  76.   char* b = malloc(n);
  77.   if (regcomp(&r, regx, REG_NOSUB | REG_EXTENDED))
  78.     puts("invalid regex");
  79.   else {
  80.     while (getline(&b, &n, f) > 0) {
  81.       if (!regexec(&r, b, 0, 0, 0))
  82.         puts(b);
  83.     }
  84.   }
  85.   regfree(&r);
  86.   free(b);
  87. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement