Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <pre>
- <?php
- function &request($url, $referer=false)
- {
- $ch = curl_init();
- curl_setopt($ch, CURLOPT_URL, $url);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
- curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');
- curl_setopt($ch, CURLOPT_ENCODING, 'gzip, deflate');
- $headers = array();
- $headers[] = 'Authority: rekvizitai.vz.lt';
- $headers[] = 'Cache-Control: max-age=0';
- $headers[] = 'Sec-Ch-Ua: \";Not A Brand\";v=\"99\", \"Chromium\";v=\"94\"';
- $headers[] = 'Sec-Ch-Ua-Mobile: ?0';
- $headers[] = 'Sec-Ch-Ua-Platform: \"Linux\"';
- $headers[] = 'Upgrade-Insecure-Requests: 1';
- $headers[] = 'User-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.81 Safari/537.36';
- $headers[] = 'Sec-Fetch-Site: same-origin';
- $headers[] = 'Sec-Fetch-Mode: navigate';
- $headers[] = 'Sec-Fetch-User: ?1';
- $headers[] = 'Sec-Fetch-Dest: document';
- $headers[] = 'Referer: '.$referer;
- $headers[] = 'Accept-Language: en-GB,en-US;q=0.9,en;q=0.8,lt-LT;q=0.7,lt;q=0.6,ru;q=0.5';
- $headers[] = 'Cookie: _ga=GA1.2.1267708846.1633970398; PHPSESSID=jju8t73qe97iiiej2f1bapv6gr; _gid=GA1.2.702172919.1635163877';
- curl_setopt($ch, CURLOPT_HTTPHEADER, $headers);
- //curl_setopt($ch, CURLOPT_HTTPPROXYTUNNEL, true);
- //curl_setopt($ch, CURLOPT_PROXY, 'localhost:8118');
- curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
- $html = curl_exec($ch);
- if (curl_errno($ch)) {
- echo 'Error:' . curl_error($ch);
- }
- curl_close($ch);
- if(strpos($html, 'IsUnusualTrafficFormValid')!==false || strpos($html,'Oi..! Neskubėkite!')!==false){
- echo("captcha on ".$url);
- $html = "";
- return $html;
- }
- return $html;
- }
- function cFileSize($bytes, $prec = 2)
- {
- if (!$bytes)
- return '0';
- $m = array('', 'K', 'M', 'G', 'T', 'P', 'E', 'Z', 'Y');
- $exp = floor(log($bytes) / log(1024));
- $prec = pow(10, $prec);
- return (round($bytes / pow(1024, floor($exp)) * $prec) / $prec) . ' ' . $m[$exp] . 'B';
- }
- function parseParams()
- {
- $params = array();
- foreach ($GLOBALS['argv'] as $arg)
- if (preg_match('/--(.*?)=(.*)/', $arg, $reg))
- $params[$reg[1]] = $reg[2];
- elseif (preg_match('/-([a-z0-9_-]*)/i', $arg, $reg))
- $params[$reg[1]] = true;
- return $params;
- }
- class walkcat
- {
- function __construct() {
- $imones = file_get_contents(__DIR__.'/repos/imones.dat');
- $imones = json_decode($imones, true);
- $this->kategorijos = $imones;
- $this->medis = json_decode(file_get_contents(__DIR__.'/repos/medis.dat'), true);
- $this->params = parseParams();
- }
- function getCat($cat,$page=1){
- $url="https://rekvizitai.vz.lt/imones/".$cat."/".$page;
- $referer = $url="https://rekvizitai.vz.lt/imones/".$cat."/".($page == 1 ? '1': $page-1);
- $html = request($url, $referer);
- echo "Fetch $cat page $page - $url | ".cFileSize(memory_get_usage())."\n";
- file_put_contents(__DIR__.'/repos/lastcat.html', $html);
- sleep(2);
- return $html;
- }
- function testAddr(){
- echo "IP:".request('http://addr.gw.lt/')."\n";
- }
- function restartTor()
- {
- $this->testAddr();
- echo "restarting tor\n";
- shell_exec("sudo /usr/sbin/service tor restart");
- sleep(10);
- $this->testAddr();
- }
- function processCat($cat, $startpage){
- $lastpage = 999;
- for($i=$startpage;$i<=$lastpage;$i++){
- $html = $this->getCat($cat, $i);
- if($lastpage==999){
- preg_match('/Paskutinis puslapis"\>(\d+)\</i', $html, $lastpage);
- $lastpage=$lastpage[1];
- echo "Last page: $lastpage\n";
- }
- preg_match_all('/https:\/\/rekvizitai.vz.lt\/imone\/(.[a-z_0-9]+)\//i', $html, $matches);
- echo "found matches ".implode(',', $matches[1]) ."\n";
- if(!count($matches[1])){
- $this->save($cat, $i);
- $i--;
- $this->restartTor();
- sleep(10);
- }
- foreach($matches[1] as $kodas){
- $this->medis[$cat][$kodas]=1;
- }
- }
- $this->save($cat, $i);
- }
- function process(){
- $this->testAddr();
- print_r($this->medis['fetch_info']['last_cat']);
- foreach($this->kategorijos as $cat)
- {
- if(isset($this->medis[$cat]) && $this->medis['fetch_info']['last_cat']['cat']!=$cat)
- continue;
- $startpage = $this->medis['fetch_info']['last_cat']['cat']==$cat ? $this->medis['fetch_info']['last_cat']['page']-1 :1;
- $this->processCat($cat, $startpage);
- }
- }
- function save($cat=false,$lastpage=false)
- {
- $this->medis['fetch_info']['last_cat']=['cat'=>$cat,'page'=>$lastpage];
- file_put_contents(__DIR__.'/repos/medis.dat', json_encode($this->medis, JSON_PRETTY_PRINT));
- }
- }
- $o = new walkcat;
- $o->process();
- $o->save();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement