Advertisement
AbdulMuttaqin

Proxy Scraper

Apr 3rd, 2019
1,324
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 1.45 KB | None | 0 0
  1. <?php
  2.  
  3. $pages = 30; // # of pages to scrape (max 30)
  4.  
  5. $proxies = array();
  6. for ($page = 1; $page <= $pages; $page++) {
  7.    
  8.    
  9.     $url = "http://nntime.com/proxy-list-" . sprintf("%02d", $page) . ".htm";
  10.    
  11.     $ch = curl_init();
  12.     curl_setopt_array($ch, array(
  13.         CURLOPT_URL => $url,
  14.         CURLOPT_FOLLOWLOCATION => true,
  15.         CURLOPT_RETURNTRANSFER => true,
  16.         CURLOPT_TIMEOUT => 15
  17.     ));
  18.     $data = curl_exec($ch);
  19.     curl_close($ch);
  20.    
  21.     if (!empty($data)) {
  22.         $getVars = get_between($data, '</script><script type="text/javascript">', "</script>");
  23.         $getVars = trim(substr($getVars, 0, -1));
  24.         $getVars = explode(";", $getVars);
  25.        
  26.         $variables = array();
  27.         foreach ($getVars as $var) {
  28.             $var = explode("=", $var);
  29.             $variables[$var[0]] = $var[1];
  30.         }
  31.        
  32.         preg_match_all('/onclick="choice\(\)" \/><\/td>(.*?)<\/script><\/td>/si', $data, $getProxies);
  33.         foreach ($getProxies[1] as $proxyRaw) {
  34.             $proxyIP = get_between($proxyRaw, "<td>", "<script type");
  35.             $proxyPort = str_replace("+", "", get_between($proxyRaw, 'document.write(":"+' , ")"));
  36.             $proxyPort = strtr($proxyPort, $variables);
  37.            
  38.             $proxies[] = $proxyIP . ":" . $proxyPort;
  39.         }
  40.     }
  41.    
  42.    
  43. }
  44.  
  45.  
  46. $saveFile = "proxies.txt";
  47. file_put_contents($saveFile, implode("\n", $proxies));
  48.  
  49. function get_between($content, $start, $end){
  50.     $r = explode($start, $content);
  51.     if (isset($r[1])) {
  52.         $r = explode($end, $r[1]);
  53.         return $r[0];
  54.     }
  55.     return "";
  56. }
  57.  
  58. echo file_get_contents('proxies.txt');
  59.  
  60.  
  61. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement