Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdlib.h>
- #include <string.h>
- #include <curl/curl.h>
- //#include <curl/types.h>
- #include <curl/easy.h>
- int i;
- void append(char * string,char ch);
- char * getHtml();
- char * getWords(char * htmlCode);
- // Define our struct for accepting LCs output
- struct BufferStruct
- {
- char * buffer;
- size_t size;
- };
- // This is the function we pass to LC, which writes the output to a BufferStruct
- static size_t WriteMemoryCallback (void *ptr, size_t size, size_t nmemb, void *data)
- {
- size_t realsize = size * nmemb;
- struct BufferStruct * mem = (struct BufferStruct *) data;
- mem->buffer = realloc(mem->buffer, mem->size + realsize + 1);
- if (mem->buffer == NULL)
- return 0;
- if ( mem->buffer )
- {
- memcpy( &( mem->buffer[ mem->size ] ), ptr, realsize );
- mem->size += realsize;
- mem->buffer[ mem->size ] = 0;
- }
- return realsize;
- }
- int main()
- {
- curl_global_init( CURL_GLOBAL_ALL );
- CURL * myHandle;
- CURLcode result; // We’ll store the result of CURL’s webpage retrieval, for simple error checking.
- myHandle = curl_easy_init ( ) ;
- char * output;
- output = getHtml("http://www.facebook.com");
- char * htmlCode = output;
- getWords(htmlCode);
- while (htmlCode) {
- char * nextLine = strchr(htmlCode, '\n');
- if (nextLine)
- *nextLine = '\0';
- if(strstr(htmlCode,"href")!=NULL) { //href in this line
- char * pos = strstr(htmlCode,"href");
- int position = pos - htmlCode;
- i = position + 6;
- while (i < strlen(htmlCode)) {
- if (htmlCode[i] == '"')
- i = strlen(htmlCode);
- printf("%c",htmlCode[i]);
- i++;
- }
- printf("\n");
- }
- if (nextLine)
- *nextLine = '\n';
- htmlCode = nextLine ? (nextLine+1) : NULL;
- }
- return 0;
- }
- void append(char * string,char ch)
- {
- int size=strlen(string);
- char temp[size+1];
- strcpy(temp,string);
- temp[size]=ch;
- temp[size+1]='\0';
- strcpy(string,temp);
- }
- char * getHtml(char *url){
- struct BufferStruct buffer;
- buffer.buffer = NULL;
- buffer.size = 0;
- CURLcode result;
- CURL * myHandle = curl_easy_init();
- curl_easy_setopt(myHandle, CURLOPT_HEADER, 0);
- curl_easy_setopt(myHandle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback); // Passing the function pointer to LC
- curl_easy_setopt(myHandle, CURLOPT_WRITEDATA, (void *)&buffer); // Passing our BufferStruct to LC
- curl_easy_setopt(myHandle, CURLOPT_URL, url);
- result = curl_easy_perform( myHandle );
- curl_easy_cleanup( myHandle );
- return buffer.buffer;
- }
- char * getWords(char * htmlCode) {
- htmlCode = strstr(htmlCode, "</head>");
- while (htmlCode) {
- char * nextLine = strchr(htmlCode, '\n');
- if (nextLine)
- *nextLine = '\0';
- int i = 0;
- int tags = 0;
- while (i<strlen(htmlCode)) {
- if (htmlCode[i] == '<')
- tags++;
- if(tags < 1 && htmlCode[i]!=' ' || tags < 1 && htmlCode[i+1]!=' ')
- printf("%c",htmlCode[i]);
- if (htmlCode[i] == '>')
- tags--;
- i++;
- }
- if (nextLine)
- *nextLine = '\n';
- htmlCode = nextLine ? (nextLine+1) : NULL;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement