NokitaKaze

anidb page downloader

Nov 3rd, 2014
258
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 4.03 KB | None | 0 0
  1. <?php
  2. /*
  3.  * Качаем данные с anidb
  4.  */
  5.  set_time_limit(0);
  6.  $folder       ='/tmp';// change save folder
  7.  $cookie_string=':NO:COOKIE:';// insert here default_tabs, adbsess, anidbsettings & adbuin
  8.  @mkdir($folder);
  9.  $sad_contype  ='text/plain; ';
  10.  header('Content-type: '.$sad_contype);
  11.  
  12.  $max_id =10500;// change this constant
  13.  
  14. // Берём список прокси
  15.  $ch     =curl_init('http://example.com/proxy/get.php?show_ip=0&order=rand');// lol, change this to any trusted proxy list
  16.  curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  17.  $buf    =curl_exec($ch);
  18.  $json   =json_decode($buf);
  19.  if (gettype($json)!='object'){
  20.   echo 'Can not get proxy list';
  21.   return;
  22.  }
  23.  if ($json->status!=0){
  24.   echo 'Can not get proxy list. Status #'.$json->status;
  25.   return;
  26.  }
  27.  
  28.  
  29. // Создаём треды
  30.  $threads=array();
  31.  foreach ($json->list as $proxy){
  32.   $threads[]=(object)array(
  33.    'proxy'     =>$proxy,
  34.    'user_agent'=>'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/38.0.2125.104 Safari/537.36',// array_rand($useragents),
  35.    'last_get'  =>0,// Последнее время использования
  36.    'next_get'  =>0,// Время следующего использования
  37.   );
  38.  }
  39.  
  40. // Начинаем парсить
  41.  curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 3);
  42.  curl_setopt($ch, CURLOPT_TIMEOUT,        3);
  43.  
  44.  while (true){
  45.   $k =0;
  46.   $u =false;
  47.   while ($k++<100000){
  48.    $id=mt_rand(1, $max_id);
  49.    $filename=$folder.'/dmp-'.$id.'.html';
  50.    if (!file_exists($filename)){
  51.     $u=true;break;
  52.    }
  53.    if (filemtime($filename)<time()-7*24*3600){
  54.     $u=true;break;
  55.    }
  56.   }
  57.  
  58.   if (!$u){
  59.    echo 'There is no undownloaded files. Bye bye';
  60.    return;
  61.   }
  62.  
  63.   echo "\nanime #".$id;
  64.   $min_last_get_id=null;
  65.   foreach ($threads as $thread_id => &$thread){
  66.    if ($thread->next_get>microtime(true)){continue;}
  67.    if ($min_last_get_id===null){
  68.     $min_last_get_id=$thread_id;
  69.    }else{
  70.     if ($thread->last_get<$threads[$min_last_get_id]->last_get){
  71.      $min_last_get_id=$thread_id;
  72.     }
  73.    }
  74.   }
  75.  
  76.   if ($min_last_get_id===null){
  77.    echo "\nNot a single thread is alive";
  78.    flush();
  79.    sleep(10);
  80.   }
  81.   $thread=&$threads[$min_last_get_id];
  82.   echo ", thread #".$min_last_get_id.' (his last get was '.
  83.    (($thread->last_get==0) ? 'never' : gmdate('Y-m-d H:i:sO', $thread->last_get)).')';
  84.   flush();
  85.  
  86.   curl_setopt($ch, CURLOPT_PROXY,     $thread->proxy);
  87.   curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);
  88.   curl_setopt($ch, CURLOPT_URL,       'http://anidb.net/perl-bin/animedb.pl?show=anime&aid='.$id);
  89.   curl_setopt($ch, CURLOPT_HTTPHEADER, array(
  90.    'Cookie: '.$cookie_string,
  91.    'Cache-Control: max-age=0',
  92.    'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
  93.    'User-agent: '.$thread->user_agent,
  94.    'Referer: http://anidb.net/perl-bin/animedb.pl?show=main',
  95.    'Accept-Encoding: gzip,deflate,sdch',
  96.    'Accept-Language: ru,en;q=0.8',
  97.   ));
  98.  
  99.   $a=microtime(true);
  100.   $buf=curl_exec($ch);
  101.   $b=microtime(true);
  102.   $thread->last_get=$b;
  103.   $thread->next_get=$thread->last_get+10;
  104.   echo '; bytes='.strlen($buf).', '.round($b-$a,2).' sec';
  105.  
  106.   if (strlen($buf)==0){
  107.    echo "; can not load that page, null response. Reload";flush();
  108.    $thread->next_get=$thread->last_get+300;
  109.    continue;
  110.   }
  111.   flush();
  112.   $buf=@gzdecode($buf);
  113.   echo ', real size is '.strlen($buf).' bytes';
  114.  
  115.   if (strlen($buf)<1000){
  116.    echo "; Too few bytes. Reload";flush();
  117.    $thread->next_get=$thread->last_get+300;
  118.    continue;
  119.   }
  120.   if (preg_match('|YOU HAVE BEEN AUTO\\-BANNED|i', $buf)){
  121.    echo "; This proxy is banned. Reload";flush();
  122.    $thread->next_get=$thread->last_get+6*3600+10;
  123.    continue;
  124.   }
  125.   if (preg_match('|Unknown anime id|i', $buf)){
  126.    echo "; This anime is not exists. It is okay";flush();
  127.   }
  128.   if (preg_match('|show\\=signup|i', $buf)){
  129.    echo "; Warning, we have signed out!";flush();
  130.   }
  131.  
  132.   // Сохраняем
  133.   file_put_contents($filename, $buf);
  134.  }
  135.  
  136.  
  137.  
  138.  
  139. ?>
Add Comment
Please, Sign In to add comment