Advertisement
_Tobias

ZippyShare PHP scraping script 0.1

Jun 19th, 2013
188
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 2.20 KB | None | 0 0
  1. <?php
  2.  
  3. /**
  4.  * ZippyShare Scraper v0.1
  5.  * by _Tobias
  6.  * Usage:
  7.  * $list [an array with full ZippyShare download page links]
  8.  * $dlf [the folder in which the files will be downloaded]
  9.  * Run from CLI please!
  10.  */
  11.  
  12. if(php_sapi_name() != 'cli') {
  13.     die('Run this script from CLI!'.PHP_EOL);
  14. }
  15.  
  16.  
  17. $list = array('http://www53.zippyshare.com/v/8362983/file.html');
  18. // $list = explode(PHP_EOL, file_get_contents('FileWithNewlineSeperatedLinks.txt'));
  19.  
  20. $dlf = 'downloads/';
  21.  
  22. if(!is_dir($dlf)) {
  23.     if(!mkdir($dlf)) {
  24.         die("Can't create download folder");
  25.     }
  26. }
  27.  
  28. foreach($list as $v) {
  29.     if(strlen($v) === 0) {
  30.         continue;
  31.     }
  32.     $html = file_get_contents($v);
  33.  
  34.     // get download url
  35.     if(preg_match('/var a = ([0-9%]+?);/', $html, $matches)) { // with var
  36.         $token = eval('return '.$matches[1].';');
  37.         preg_match('/href = "(.+?)";/', $html, $matches);
  38.         $url = str_replace('"+a+"', $token, $matches[1]);
  39.     }
  40.     else { // without var, older servers
  41.         preg_match('/href = (.+?);/', $html, $matches);
  42.         $url = str_replace(array('"+', '+"'), array('".', '."'), $matches[1]); // + to . for php
  43.         $url = eval('return '.$url.';');
  44.     }
  45.  
  46.     // get size
  47.     preg_match_all('/<font style=".+?">([0-9]{1}.+?B)<\/font>/', $html, $matches);
  48.     $size = $matches[1][0];
  49.  
  50.     // get title
  51.     preg_match('/<meta property="og:title" content="(.+?) " \/>/', $html, $matches);
  52.     $title = $matches[1];
  53.  
  54.     // get server number
  55.     preg_match('/www([0-9]+?)\./', $html, $matches);
  56.  
  57.     // assemble url
  58.     $url = 'http://www'.$matches[1].'.zippyshare.com'.$url;
  59.  
  60.     // get session cookie
  61.     $header = '';
  62.     foreach($http_response_header as $h) {
  63.         if(strpos($h, 'Set-Cookie: JSESSIONID=') === 0) {
  64.             preg_match('/JSESSIONID=([A-F0-9]+?);/', $h, $matches);
  65.             $header = 'Cookie: JSESSIONID='.$matches[1];
  66.         }
  67.     }
  68.  
  69.     // create a context for the download
  70.     $context = stream_context_create(array('http' => array('method' => 'GET', 'header' => $header)));
  71.  
  72.     // assemble file name
  73.     $fn = $dlf.$no.". ".$title;
  74.  
  75.     // start download
  76.     if(is_file($fn)) {
  77.         echo 'Skipping '.$title.' ('.$size.')'.PHP_EOL;
  78.         continue;
  79.     }
  80.     else {
  81.         echo 'Downloading '.$title.' ('.$size.')'.PHP_EOL;
  82.     }
  83.     file_put_contents($fn, file_get_contents($url, false, $context));
  84. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement