Advertisement
MrRockchip

testytlinks.c

Oct 26th, 2014 (edited)
117
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 12.48 KB | None | 0 0
  1. /* testy */
  2. #include <stdio.h>
  3. #include <stdlib.h>
  4. #include <string.h>
  5. #include <unistd.h>
  6. #include <curl/curl.h>
  7.  
  8. struct MemoryStruct {
  9.     char *memory;
  10.     size_t size;
  11. };
  12.  
  13. static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp)
  14. {
  15.     size_t realsize = size * nmemb;
  16.     struct MemoryStruct *mem = (struct MemoryStruct *)userp;
  17.     mem->memory = realloc(mem->memory, mem->size + realsize + 1);
  18.     if(mem->memory == NULL) {
  19.         /* out of memory! */
  20.         fprintf(stdout, "ERROR: not enough memory (realloc returned NULL)\n");
  21.             fflush(stdout);
  22.         return 1;
  23.     }
  24.     memcpy(&(mem->memory[mem->size]), contents, realsize);
  25.     mem->size += realsize;
  26.     mem->memory[mem->size] = 0;
  27.     return realsize;
  28. }
  29.  
  30. int verify_youtube_video(char *videoid);
  31. int verify_youtube_playlist(char *playlistid);
  32. int verify_youtube_playlist_request(char *playlistid, char *parameters);
  33.  
  34. int main(int argc, char *argv[]) {
  35.     FILE * youtube_links;
  36.     char * link_type;
  37.     char line [1024];
  38.     char videoid[18+1]; // also, a place to store playlist id
  39.     int i=0;
  40.     int skip_flag;
  41.  
  42.     if ( argc != 2 ) {
  43.         fprintf(stdout, "USAGE: %s filepath\n", argv[0]);
  44.             fflush(stdout);
  45.         return 1;
  46.     }
  47.  
  48.     youtube_links = fopen(argv[1], "rb");
  49.     if (youtube_links == NULL) {
  50.         fprintf(stdout, "ERROR: cannot open a file with youtube links\n");
  51.             fflush(stdout);
  52.         return 1;
  53.     }
  54.  
  55.     while ( fgets ( line, sizeof line, youtube_links ) != NULL ) // read a file with youtube links, line by line
  56.     {
  57.         link_type = strstr(line, "view_play_list?p=");
  58.         if (link_type!=NULL) { // if it is a link to youtube playlist, then verify all the videos inside a playlist
  59.                 for (i=17; i<17+18; i++) videoid[i-17]=link_type[i];
  60.                 videoid[18]='\0'; // playlist ID could be 18 or 16 characters long
  61.                 if (videoid[16]=='\n') videoid[16]='\0'; // if playlist ID is 16 characters long
  62.                 fprintf(stdout, "PLAYLIST - %s\n", videoid);
  63.                 verify_youtube_playlist(videoid);
  64.         }
  65.         else {
  66.             link_type = strstr(line, "/p/");
  67.             if (link_type!=NULL) { // if it is a link to youtube playlist, then verify all the videos inside a playlist
  68.                 for (i=3; i<3+18; i++) videoid[i-3]=link_type[i];
  69.                 videoid[18]='\0'; // playlist ID could be 18 or 16 characters long
  70.                 if (videoid[16]=='\n') videoid[16]='\0'; // if playlist ID is 16 characters long
  71.                 fprintf(stdout, "PLAYLIST - %s\n", videoid);
  72.                 verify_youtube_playlist(videoid);
  73.             }
  74.             else {
  75.                 skip_flag=0;
  76.                 link_type = strstr(line, "watch?v=");
  77.                 if (link_type!=NULL) for (i=8; i<8+11; i++) videoid[i-8]=link_type[i]; // for standard video link
  78.                 else {
  79.                     link_type = strstr(line, "/v/");
  80.                     if (link_type!=NULL) for (i=3; i<3+11; i++) videoid[i-3]=link_type[i]; // for shorter video link
  81.                     else {
  82.                       link_type = strstr(line, "youtu.be/");
  83.                       if (link_type!=NULL) for (i=9; i<9+11; i++) videoid[i-9]=link_type[i]; // for standard video link
  84.                       else skip_flag=1;
  85.                       /*
  86.                        * Or, instead of using skip_flag, we could assume that each line is just a video id:
  87.                        *
  88.                        * else for (i=0; i<11; i++) videoid[i]=line[i];
  89.                        *
  90.                        * But this case would not be needed for SiteSucker.
  91.                        *
  92.                        */
  93.                     }                                          
  94.                 }
  95.                 if (skip_flag!=1) {
  96.                     videoid[11]='\0'; // videoid cannot be longer than 11 characters
  97.                     fprintf(stdout, "VIDEO    :      ");
  98.                     verify_youtube_video(videoid); // check if a video with such videoid is available
  99.                 }
  100.             }
  101.         }
  102.         sleep(1); // SLEEP(1), OR USLEEP(1000000), IS NECESSARY HERE FOR CORRECT RESULTS
  103.     }
  104.  
  105.     fclose(youtube_links);
  106.     return 0;
  107. }
  108.  
  109. int verify_youtube_playlist(char *playlistid) {
  110.     int start_index=1; // stage of playlist analysis
  111.     char start_index_str[8]; // place to store start_index
  112.     size_t start_index_strlen;
  113.     int ret=1; // how many videos have been processed during a stage of playlist analysis
  114.     char * parameters; // full parameters for a stage of playlist analysis
  115.  
  116.     const char * defaults = "?v=2&max-results=50&start-index="; // default parameters
  117.     size_t defaults_len = strlen(defaults);
  118.  
  119.     /*
  120.      * Youtube does not allow to get info about more than max_results=50 videos (for each playlist request)
  121.      * So, we have to make many requests, until we would run out of videos for that playlist.
  122.      * Parameters request should be like "?v=2&max-results=50&start-index=1", "?v=2&max-results=50&start-index=51", ...
  123.      */
  124.  
  125.     while (ret>0) {
  126.         sprintf(start_index_str, "%d", start_index);
  127.         start_index_strlen=strlen(start_index_str);
  128.         parameters=malloc(defaults_len+start_index_strlen+1);
  129.         if (!parameters) return -1;
  130.         memcpy(parameters, defaults, defaults_len);
  131.         memcpy(parameters + defaults_len, start_index_str, start_index_strlen);
  132.         parameters[defaults_len + start_index_strlen] = '\0';
  133.         ret=verify_youtube_playlist_request(playlistid, parameters);
  134.         start_index=start_index+50;
  135.         if (ret<50) ret=0; // if less than 50 videos were processed, there is no need for a new request, so lets exit
  136.     }
  137.  
  138.     return 0;
  139. }
  140.  
  141. int verify_youtube_playlist_request(char *playlistid, char *parameters) {
  142.  
  143.     int counter=0; // counter will be returning the number of videos
  144.                    // that have been processed during playlist analysis
  145.  
  146.     // CURL standard section 1 : START
  147.  
  148.     CURL *curl_handle;
  149.     CURLcode res;
  150.  
  151.     struct MemoryStruct chunk;
  152.  
  153.     chunk.memory = malloc(1);  /* will be grown as needed by the realloc above */
  154.     chunk.size = 0;            /* no data at this point */
  155.  
  156.     curl_global_init(CURL_GLOBAL_ALL);
  157.  
  158.     curl_handle = curl_easy_init(); /* init the curl session */
  159.  
  160.     // CURL standard section 1 : END
  161.  
  162.     const char *gdata = "http://gdata.youtube.com/feeds/api/playlists/";
  163.     size_t gdata_len = strlen(gdata);
  164.  
  165.     size_t playlistid_len = strlen(playlistid); // playlist id length can be 16 or 18
  166.  
  167.     size_t parameters_len = strlen(parameters);
  168.  
  169.     char *curl_url = malloc(gdata_len + playlistid_len + parameters_len + 1);
  170.     if (!curl_url) return -1;
  171.  
  172.     memcpy(curl_url, gdata, gdata_len);
  173.     memcpy(curl_url + gdata_len, playlistid, playlistid_len);
  174.     memcpy(curl_url + gdata_len + playlistid_len, parameters, parameters_len);
  175.  
  176.     curl_url[gdata_len + playlistid_len + parameters_len] = '\0';
  177.  
  178.     // CURL standard section 2 : START
  179.  
  180.     /* specify URL to get */
  181.     curl_easy_setopt(curl_handle, CURLOPT_URL, curl_url);
  182.  
  183.     /* send all data to this function  */
  184.     curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
  185.  
  186.     /* we pass our 'chunk' struct to the callback function */
  187.     curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
  188.  
  189.     /* some servers don't like requests that are made without a user-agent field, so we provide one */
  190.     curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");
  191.  
  192.     /* get it! */
  193.     res = curl_easy_perform(curl_handle);
  194.  
  195.     /* check for errors */
  196.     if(res != CURLE_OK) {
  197.         fprintf(stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(res));
  198.         return -1;
  199.     }
  200.     // CURL standard section 2 : END
  201.     else {
  202.         /*
  203.          * Now, our chunk.memory points to a memory block that is chunk.size
  204.          * bytes big and contains the remote file.
  205.          *
  206.          * Do something nice with it!
  207.          */
  208.  
  209.          sleep(1); // we need this extra sleep before requesting for 1st video
  210.                    // because we have just sent a request for playlist,
  211.                    // - and there should be 1 second delay between requests
  212.  
  213.          char *search_pointer = chunk.memory;
  214.          char *check_result = search_pointer;
  215.          char videoid[12];
  216.          int i;
  217.  
  218.          while (check_result!=NULL) {
  219.              check_result = strstr(search_pointer, "url='http://www.youtube.com/watch?v=");
  220.              if (check_result!=NULL) {
  221.                  search_pointer=check_result+(36+11)*sizeof(char);
  222.                  for (i=0; i<11; i++) {
  223.                      videoid[i]=check_result[i+36];
  224.                  }
  225.                  videoid[11]='\0';
  226.                  counter=counter+1;
  227.                  fprintf(stdout, "        VIDEO : ");
  228.                  verify_youtube_video(videoid);
  229.                  sleep(1); // SLEEP(1), OR USLEEP(1000000), IS NECESSARY HERE FOR CORRECT RESULTS
  230.              }
  231.          }
  232.     }
  233.  
  234.     /* cleanup curl stuff */
  235.     curl_easy_cleanup(curl_handle);
  236.  
  237.     if(chunk.memory)
  238.         free(chunk.memory);
  239.  
  240.     /* we're done with libcurl, so clean it up */
  241.     curl_global_cleanup();
  242.  
  243.     return counter;
  244. }
  245.  
  246. int verify_youtube_video(char *videoid)
  247. {
  248.     // CURL standard section 1 : START
  249.    
  250.     CURL *curl_handle;
  251.     CURLcode res;
  252.  
  253.     struct MemoryStruct chunk;
  254.  
  255.     chunk.memory = malloc(1);  /* will be grown as needed by the realloc above */
  256.     chunk.size = 0;            /* no data at this point */
  257.  
  258.     curl_global_init(CURL_GLOBAL_ALL);
  259.  
  260.     curl_handle = curl_easy_init(); /* init the curl session */
  261.  
  262.     // CURL standard section 1 : END
  263.  
  264.     curl_easy_setopt(curl_handle, CURLOPT_HEADER, 1); // HERE, WE NEED A HEADER ONLY,
  265.     curl_easy_setopt(curl_handle, CURLOPT_NOBODY, 1); // NOTHING ELSE
  266.  
  267.     const char *gdata = "http://gdata.youtube.com/feeds/api/videos/";
  268.     size_t gdata_len = strlen(gdata);
  269.  
  270.     //size_t videoid_len = strlen(videoid);
  271.     size_t videoid_len = 11; // videoid length is always 11
  272.  
  273.     char *curl_url = malloc(gdata_len + videoid_len + 1);
  274.     if (!curl_url) return -1;
  275.  
  276.     memcpy(curl_url, gdata, gdata_len);
  277.     memcpy(curl_url + gdata_len, videoid, videoid_len);
  278.     curl_url[gdata_len + videoid_len] = '\0';
  279.  
  280.     // CURL standard section 2 : START
  281.  
  282.     /* specify URL to get */
  283.     curl_easy_setopt(curl_handle, CURLOPT_URL, curl_url);
  284.  
  285.     /* send all data to this function  */
  286.     curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
  287.  
  288.     /* we pass our 'chunk' struct to the callback function */
  289.     curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *)&chunk);
  290.  
  291.     /* some servers don't like requests that are made without a user-agent field, so we provide one */
  292.     curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, "libcurl-agent/1.0");
  293.  
  294.     /* get it! */
  295.     res = curl_easy_perform(curl_handle);
  296.  
  297.     /* check for errors */
  298.     if(res != CURLE_OK) {
  299.         fprintf(stderr, "curl_easy_perform() failed: %s\n", curl_easy_strerror(res));
  300.         return -1;
  301.     }
  302.     // CURL standard section 2 : END
  303.     else {
  304.         /*
  305.          * Now, our chunk.memory points to a memory block that is chunk.size
  306.          * bytes big and contains the remote file.
  307.          *
  308.          * Do something nice with it!
  309.          */
  310.  
  311.          char *check_result;
  312.  
  313.          check_result = strstr(chunk.memory, "200 OK");
  314.          if (check_result!=NULL) {
  315.              fprintf(stdout, "%s - 200 OK\n", videoid);
  316.                  fflush(stdout);
  317.          }
  318.  
  319.          check_result = strstr(chunk.memory, "400 Bad Request");
  320.          if (check_result!=NULL) {
  321.              fprintf(stdout, "%s - 400 Bad Request\n", videoid);
  322.                  fflush(stdout);
  323.          }
  324.  
  325.          check_result = strstr(chunk.memory, "403 Forbidden");
  326.          if (check_result!=NULL) {
  327.              fprintf(stdout, "%s - 403 Forbidden\n", videoid);
  328.                  fflush(stdout);
  329.           }
  330.  
  331.           check_result = strstr(chunk.memory, "404 Not Found");
  332.           if (check_result!=NULL) {
  333.               fprintf(stdout, "%s - 404 Not Found\n", videoid);
  334.                   fflush(stdout);
  335.           }
  336.     }
  337.  
  338.     /* cleanup curl stuff */
  339.     curl_easy_cleanup(curl_handle);
  340.  
  341.     if(chunk.memory)
  342.         free(chunk.memory);
  343.  
  344.     /* we're done with libcurl, so clean it up */
  345.     curl_global_cleanup();
  346.  
  347.     return 0;
  348. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement