Advertisement
Guest User

ESC Scraper

a guest
Jan 24th, 2018
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. var https = require('https');
  2.  
  3. var charList = 'qwertzuiopasdfghjklyxcvbnmQWERTZUIOPASDFGHJKLYXCVBNM1234567890';
  4. const plainURL = "https://showcase-emp.ebu.ch/showcase/embedded.html?tag=dm9kOz";
  5.  
  6. //Unused atm
  7. //const appendedLength = 6;
  8. //var runningRequests = 0;
  9. //var maxRequests = 1;
  10.  
  11. //Startvalues
  12. //var a = 14, b = 5, c = 51, d = 38, e = 45, f = 45;
  13. var a = 0, b = 0, c = 0, d = 0, e = 0, f = 0;
  14.  
  15.  
  16. //example: ("https://showcase-emp.ebu.ch/showcase/embedded.html?tag=dm9kOzgzMDY1")
  17. //example: ("https://showcase-emp.ebu.ch/showcase/embedded.html?tag=dm9kOzgzMDY0")
  18.  
  19.  
  20.  
  21. //Loop doesnt wor for now.
  22. //Would be faster ;)
  23. //for (i = 0; i < 30; i++) {
  24.  
  25.     checkNexturl();
  26. //}
  27.  
  28.  
  29. function checkNexturl() {
  30.  
  31.     var urlAppender = getNextUrlAppendix();
  32.  
  33.     if (urlAppender == "DONE")
  34.         return;
  35.  
  36.     completeURL = plainURL + urlAppender;
  37.     https.get(completeURL, function (res) {
  38.         var pageData = "";
  39.  
  40.         //res.resume();
  41.         res.on('data', function (chunk) {
  42.             pageData += chunk;
  43.         });
  44.  
  45.         res.on('end', function () {
  46.             if (pageData.indexOf("Video not available") > -1) {
  47.                 console.log("NOK;" + completeURL);
  48.             }
  49.             else {
  50.                 //Prevent newline
  51.                 process.stdout.write("OK ;" + completeURL);
  52.  
  53.                 var start = pageData.indexOf("<div class=\"desc_file\">");
  54.                 if (start > 0) {
  55.                     var end = pageData.indexOf("</div>", start);
  56.                     if (end > 0) {
  57.  
  58.                         process.stdout.write("; Title;" + pageData.substring(start + 23, end));
  59.                     }
  60.                 }
  61.                 process.stdout.write("\n");
  62.             }
  63.             checkNexturl(); //call the next url to fetch
  64.         });
  65.  
  66.     }).on('error', function (e) {
  67.         console.log("Error: " + options.host + "\n" + e.message);
  68.         checkNexturl(); //call the next url to fetch
  69.     });
  70.    
  71. }
  72.  
  73.  
  74. function getNextUrlAppendix() {
  75.     var newUrl = "";
  76.  
  77.     newUrl = charList[a] + charList[b] + charList[c] + charList[d] + charList[e] + charList[f];
  78.    
  79.     f++;
  80.     if (f >= charList.length) {
  81.         f = 0;
  82.         e++;
  83.     }
  84.     if (e >= charList.length) {
  85.         e = 0;
  86.         d++;
  87.     }
  88.     if (d >= charList.length) {
  89.         d = 0;
  90.         c++;
  91.     }
  92.     if (c >= charList.length) {
  93.         c = 0;
  94.         b++;
  95.     }
  96.     if (b >= charList.length) {
  97.         b = 0;
  98.         a++;
  99.     }
  100.     if (a >= charList.length) {
  101.         return "DONE"
  102.     }
  103.    
  104.  
  105.     return newUrl;
  106.    
  107. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement