Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <sys/socket.h>
- #include <netdb.h>
- #include <regex.h>
- #define SZ 1024 //good default buffer size
- #define ERROR(s) (puts(s)&&0)
- #define VERROR(s) ((void*)(long)ERROR(s))
- main()
- {
- char* SITE = "www.stroustrup.com";
- char* PAGE = "C++.html";
- char* REGX = "((http://)?www([./#\\+-]\\w*)+)";
- int s = connect_to(SITE);
- FILE* f = fetch_page(s,SITE,PAGE);
- if (f)
- list_matches(f,REGX);
- else
- return printf("Can't fetch page %s/%s\n", SITE, PAGE);
- fclose(f);
- close(s);
- }
- int connect_to(char* w)
- {
- // returns socket connected to website w.
- int s = socket(AF_INET, SOCK_STREAM, 0);
- // lookup host; address = port 80 of ipaddr; connect
- struct hostent* h = gethostbyname(w);
- if (!h)
- return ERROR("No Such Host");
- struct sockaddr_in a;
- a.sin_port = htons(80);
- a.sin_family = AF_INET;
- memcpy(&a.sin_addr.s_addr, h->h_addr, h->h_length);
- if (connect(s, (struct sockaddr*)&a, sizeof(a)) < 0)
- return ERROR("can't connect");
- else
- return s;
- }
- FILE* fetch_page(int s, char*w, char* p)
- {
- // returns open file handle for page p from site w connected to socket s
- // create cache file; allocate buffer; check for errors; compose request
- FILE*f = fopen("/tmp/wcache", "w+");
- size_t n;
- char* b = malloc(SZ);
- if (!s || !f || !b)
- return VERROR("Resource Error");
- sprintf(b, "GET / HTTP/1.0\r\nHost:%s\r\nAccept:*/*\r\nConnection:close\r\n\r\n", w);
- // send request; receive request; write to cache file
- send(s, b, strlen(b), 0);
- while ((n = recv(s, b, SZ, 0)) > 0)
- fwrite(b, 1, n, f);
- n = 0;
- fseek(f, n, SEEK_SET);
- fgets(b,SZ,f); // read cached page, check response is OK
- if (!f || strncmp(b, "HTTP/", 5))
- return VERROR("Invalid Response");
- if (strtok(b, " ") && atoi(strtok(0, " ")) != 200)
- return VERROR("Bad Status Code");
- while (getline(&b, &n, f)>=0 && *b!='\r'); // discard headers upto blank line
- free(b);
- return f;
- }
- void list_matches(FILE* f, char* regx)
- {
- //prints all strings from f which match regx
- regex_t r;
- size_t n = SZ;
- char* b = malloc(n);
- if (regcomp(&r, regx, REG_NOSUB | REG_EXTENDED))
- puts("invalid regex");
- else {
- while (getline(&b, &n, f) > 0) {
- if (!regexec(&r, b, 0, 0, 0))
- puts(b);
- }
- }
- regfree(&r);
- free(b);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement