testytlinks.c

/* testy */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <curl/curl.h>

struct MemoryStruct {
    char *memory;
    size_t size;
};

static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
{
    size_t realsize = size * nmemb;
    struct MemoryStruct *mem = (struct MemoryStruct *)userp;
    mem->memory = realloc(mem->memory, mem->size + realsize + 1);
    if(mem->memory == NULL) {
        /* out of memory! */
        fprintf(stdout, "ERROR: not enough memory (realloc returned NULL)\n");
            fflush(stdout);
        return 1;
    }
    memcpy(&(mem->memory[mem->size]), contents, realsize);
    mem->size += realsize;
    mem->memory[mem->size] = 0;
    return realsize;
}

int verify_youtube_video(char *videoid);
int verify_youtube_playlist(char *playlistid);
int verify_youtube_playlist_request(char *playlistid, char *parameters);

int main(int argc, char *argv[]) {
    FILE * youtube_links;
    char * link_type;
    char line [1024];
    char videoid[18+1]; // also, a place to store playlist id
    int i=0;
    int skip_flag;

    if ( argc != 2 ) {
        fprintf(stdout, "USAGE: %s filepath\n", argv[0]);
            fflush(stdout);
        return 1;
    }

    youtube_links = fopen(argv[1], "rb");
    if (youtube_links == NULL) {
        fprintf(stdout, "ERROR: cannot open a file with youtube links\n");
            fflush(stdout);
        return 1;
    }

    while ( fgets ( line, sizeof line, youtube_links ) != NULL ) // read a file with youtube links, line by line
    {
        link_type = strstr(line, "view_play_list?p=");
        if (link_type!=NULL) { // if it is a link to youtube playlist, then verify all the videos inside a playlist
                for (i=17; i<17+18; i++) videoid[i-17]=link_type[i];
                videoid[18]='\0'; // playlist ID could be 18 or 16 characters long
                if (videoid[16]=='\n') videoid[16]='\0'; // if playlist ID is 16 characters long
                fprintf(stdout, "PLAYLIST - %s\n", videoid);
                verify_youtube_playlist(videoid);
        }
        else {
            link_type = strstr(line, "/p/");
            if (link_type!=NULL) { // if it is a link to youtube playlist, then verify all the videos inside a playlist
                for (i=3; i<3+18; i++) videoid[i-3]=link_type[i];
                videoid[18]='\0'; // playlist ID could be 18 or 16 characters long
                if (videoid[16]=='\n') videoid[16]='\0'; // if playlist ID is 16 characters long
                fprintf(stdout, "PLAYLIST - %s\n", videoid);
                verify_youtube_playlist(videoid);
            }
            else {
                skip_flag=0;
                link_type = strstr(line, "watch?v=");
                if (link_type!=NULL) for (i=8; i<8+11; i++) videoid[i-8]=link_type[i]; // for standard video link
                else {
                    link_type = strstr(line, "/v/");
                    if (link_type!=NULL) for (i=3; i<3+11; i++) videoid[i-3]=link_type[i]; // for shorter video link
                    else {
                      link_type = strstr(line, "youtu.be/");
                      if (link_type!=NULL) for (i=9; i<9+11; i++) videoid[i-9]=link_type[i]; // for standard video link
                      else skip_flag=1;
                      /*
                       * Or, instead of using skip_flag, we could assume that each line is just a video id:
                       *
                       * else for (i=0; i<11; i++) videoid[i]=line[i];
                       *
                       * But this case would not be needed for SiteSucker.
                       *
                       */
                    }
                }
                if (skip_flag!=1) {
                    videoid[11]='\0'; // videoid cannot be longer than 11 characters
                    fprintf(stdout, "VIDEO    :      ");
                    verify_youtube_video(videoid); // check if a video with such videoid is available
                }
            }
        }
        sleep(1); // SLEEP(1), OR USLEEP(1000000), IS NECESSARY HERE FOR CORRECT RESULTS
    }

    fclose(youtube_links);
    return 0;
}

int verify_youtube_playlist(char *playlistid) {
    int start_index=1; // stage of playlist analysis
    char start_index_str[8]; // place to store start_index
    size_t start_index_strlen;
    int ret=1; // how many videos have been processed during a stage of playlist analysis
    char * parameters; // full parameters for a stage of playlist analysis

    const char * defaults = "?v=2&max-results=50&start-index="; // default parameters
    size_t defaults_len = strlen(defaults);

    /*
     * Youtube does not allow to get info about more than max_results=50 videos (for each playlist request)
     * So, we have to make many requests, until we would run out of videos for that playlist.
     * Parameters request should be like "?v=2&max-results=50&start-index=1", "?v=2&max-results=50&start-index=51", ...
     */

    while (ret>0) {
        sprintf(start_index_str, "%d", start_index);
        start_index_strlen=strlen(start_index_str);
        parameters=malloc(defaults_len+start_index_strlen+1);
        if (!parameters) return -1;
        memcpy(parameters, defaults, defaults_len);
        memcpy(parameters + defaults_len, start_index_str, start_index_strlen);
        parameters[defaults_len + start_index_strlen] = '\0';
        ret=verify_youtube_playlist_request(playlistid, parameters);
        start_index=start_index+50;
        if (ret<50) ret=0; // if less than 50 videos were processed, there is no need for a new request, so lets exit
    }

    return 0;
}

int verify_youtube_playlist_request(char *playlistid, char *parameters) {

    int counter=0; // counter will be returning the number of videos
                   // that have been processed during playlist analysis

    // CURL standard section 1 : START

    CURL *curl_handle;
    CURLcode res;

    struct MemoryStruct chunk;

    chunk.memory = malloc(1);  /* will be grown as needed by the realloc above */
    chunk.size = 0;            /* no data at this point */

    curl_global_init(CURL_GLOBAL_ALL);

    curl_handle = curl_easy_init(); /* init the curl session */

    // CURL standard section 1 : END

    const char *gdata = "http://gdata.youtube.com/feeds/api/playlists/";
    size_t gdata_len = strlen(gdata);

    size_t playlistid_len = strlen(playlistid); // playlist id length can be 16 or 18

    size_t parameters_len = strlen(parameters);

    char *curl_url = malloc(gdata_len + playlistid_len + parameters_len + 1);
    if (!curl_url) return -1;

    memcpy(curl_url, gdata, gdata_len);
    memcpy(curl_url + gdata_len, playlistid, playlistid_len);
    memcpy(curl_url + gdata_len + playlistid_len, parameters, parameters_len);

    curl_url[gdata_len + playlistid_len + parameters_len] = '\0';

    // CURL standard section 2 : START

    /* specify URL to get */
    curl_easy_setopt(curl_handle, CURLOPT_URL, curl_url);

    /* send all data to this function  */
    curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);

    /* we pass our 'chunk' struct to the callback function */
    curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);

    /* some servers don't like requests that are made without a user-agent field, so we provide one */
    curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");

    /* get it! */
    res = curl_easy_perform(curl_handle);

    /* check for errors */
    if(res != CURLE_OK) {
        fprintf(stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(res));
        return -1;
    }
    // CURL standard section 2 : END
    else {
        /*
         * Now, our chunk.memory points to a memory block that is chunk.size
         * bytes big and contains the remote file.
         *
         * Do something nice with it!
         */

         sleep(1); // we need this extra sleep before requesting for 1st video
                   // because we have just sent a request for playlist,
                   // - and there should be 1 second delay between requests

         char *search_pointer = chunk.memory;
         char *check_result = search_pointer;
         char videoid[12];
         int i;

         while (check_result!=NULL) {
             check_result = strstr(search_pointer, "url='http://www.youtube.com/watch?v=");
             if (check_result!=NULL) {
                 search_pointer=check_result+(36+11)*sizeof(char);
                 for (i=0; i<11; i++) {
                     videoid[i]=check_result[i+36];
                 }
                 videoid[11]='\0';
                 counter=counter+1;
                 fprintf(stdout, "        VIDEO : ");
                 verify_youtube_video(videoid);
                 sleep(1); // SLEEP(1), OR USLEEP(1000000), IS NECESSARY HERE FOR CORRECT RESULTS
             }
         }
    }

    /* cleanup curl stuff */
    curl_easy_cleanup(curl_handle);

    if(chunk.memory)
        free(chunk.memory);

    /* we're done with libcurl, so clean it up */
    curl_global_cleanup();

    return counter;
}

int verify_youtube_video(char *videoid)
{
    // CURL standard section 1 : START

    CURL *curl_handle;
    CURLcode res;

    struct MemoryStruct chunk;

    chunk.memory = malloc(1);  /* will be grown as needed by the realloc above */
    chunk.size = 0;            /* no data at this point */

    curl_global_init(CURL_GLOBAL_ALL);

    curl_handle = curl_easy_init(); /* init the curl session */

    // CURL standard section 1 : END

    curl_easy_setopt(curl_handle, CURLOPT_HEADER, 1); // HERE, WE NEED A HEADER ONLY,
    curl_easy_setopt(curl_handle, CURLOPT_NOBODY, 1); // NOTHING ELSE

    const char *gdata = "http://gdata.youtube.com/feeds/api/videos/";
    size_t gdata_len = strlen(gdata);

    //size_t videoid_len = strlen(videoid);
    size_t videoid_len = 11; // videoid length is always 11

    char *curl_url = malloc(gdata_len + videoid_len + 1);
    if (!curl_url) return -1;

    memcpy(curl_url, gdata, gdata_len);
    memcpy(curl_url + gdata_len, videoid, videoid_len);
    curl_url[gdata_len + videoid_len] = '\0';

    // CURL standard section 2 : START

    /* specify URL to get */
    curl_easy_setopt(curl_handle, CURLOPT_URL, curl_url);

    /* send all data to this function  */
    curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);

    /* we pass our 'chunk' struct to the callback function */
    curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);

    /* some servers don't like requests that are made without a user-agent field, so we provide one */
    curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");

    /* get it! */
    res = curl_easy_perform(curl_handle);

    /* check for errors */
    if(res != CURLE_OK) {
        fprintf(stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(res));
        return -1;
    }
    // CURL standard section 2 : END
    else {
        /*
         * Now, our chunk.memory points to a memory block that is chunk.size
         * bytes big and contains the remote file.
         *
         * Do something nice with it!
         */

         char *check_result;

         check_result = strstr(chunk.memory, "200 OK");
         if (check_result!=NULL) {
             fprintf(stdout, "%s - 200 OK\n", videoid);
                 fflush(stdout);
         }

         check_result = strstr(chunk.memory, "400 Bad Request");
         if (check_result!=NULL) {
             fprintf(stdout, "%s - 400 Bad Request\n", videoid);
                 fflush(stdout);
         }

         check_result = strstr(chunk.memory, "403 Forbidden");
         if (check_result!=NULL) {
             fprintf(stdout, "%s - 403 Forbidden\n", videoid);
                 fflush(stdout);
          }

          check_result = strstr(chunk.memory, "404 Not Found");
          if (check_result!=NULL) {
              fprintf(stdout, "%s - 404 Not Found\n", videoid);
                  fflush(stdout);
          }
    }

    /* cleanup curl stuff */
    curl_easy_cleanup(curl_handle);

    if(chunk.memory)
        free(chunk.memory);

    /* we're done with libcurl, so clean it up */
    curl_global_cleanup();

    return 0;
}