Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- /*
- Author : Denis Szalkowski Copyright © 2012
- Licence : GNU General Public Licence v3.0
- Cette classe a pour objet de récupérer le contenu d'une url à l'aide de Curl.
- Elle peut être à l'origine d'outils de scrapping.
- */
- class Curl{
- private static $ua=Array(
- 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.2) Gecko/20100101 Firefox/10.0.2',
- 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.24 (KHTML, like Gecko) Chrome/19.0.1055.0 Safari/535.24',
- 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:8.0.1) Gecko/20111117 Firefox/8.0.1',
- 'Opera/9.80 (Windows NT 6.1; U; fr) Presto/2.10.229 Version/11.61'
- );
- /*
- Constructeur
- */
- function __construct()
- {
- }
- /*
- Exécution de la requête
- */
- public function execHttp($url)
- {
- if(!preg_match('/^https?:\/\//i',$url))
- {
- echo "Erreur : l'url doit être de type http:// ou https://";
- exit;
- }
- //Récupération du referer à partir de l'url transmise
- preg_match('/^(https?:\/\/)([^\/]+)\//i',$url,$referers);
- $referer=$referers[1].$referers[2];
- /*
- $u, $t, $s sont respectivement utilisées pour disposer :
- - d'un user agent aléatoire
- - d'un timeout aléatoire
- - d'un débit aléatoire
- */
- //$u=rand(0,3);
- $t=rand(5,10);
- $s=rand(105000,235000);
- //Utilisation de Curl
- $ch = curl_init();
- curl_setopt($ch, CURLOPT_URL, $url);
- curl_setopt($ch, CURLOPT_USERAGENT,self::$ua[0]);
- curl_setopt($ch, CURLOPT_COOKIESESSION,1);
- curl_setopt($ch, CURLOPT_COOKIEFILE,'cookie.txt');
- curl_setopt($ch, CURLOPT_COOKIEJAR,'cookie.txt');
- curl_setopt($ch, CURLOPT_HEADER, 0);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
- curl_setopt($ch, CURLOPT_REFERER, $referer);
- curl_setopt($ch, CURLOPT_AUTOREFERER, 1);
- //Pour le SSL
- curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, 0);
- curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, 0);
- $headers=Array(
- 'Accept: text/plain',
- 'Accept-Charset: utf-8',
- 'Accept-Encoding: compress; gzip; deflate',
- 'Accept-Language: fr-FR',
- 'Cache-Control: no-cache',
- 'Content-Type: text/plain',
- 'Pragma: no-cache',
- "Referer: ".$referer,
- "User-Agent: ".self::$ua[0]
- );
- curl_setopt( $ch, CURLOPT_HTTPHEADER,$headers);
- //curl_setopt($ch, CURLOPT_CUSTOMREQUEST, "GET");
- curl_setopt($ch, CURLOPT_HTTPGET, 1);
- curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $t);
- curl_setopt($ch, CURLOPT_TIMEOUT,20);
- curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);
- // En cas d'utilisation de Privoxy, pensez alors à le coupler à TOR
- //curl_setopt($ch, CURLOPT_PROXY, "http://127.0.0.1:8118");
- //curl_setopt($ch, CURLOPT_PROXYPORT, 8118);
- curl_setopt($ch, CURLOPT_FORBID_REUSE, 1);
- curl_setopt($ch, CURLOPT_FRESH_CONNECT, 1);
- curl_setopt($ch, CURLOPT_MAXCONNECTS, 1);
- curl_setopt($ch, CURLOPT_MAX_RECV_SPEED_LARGE, $s);
- curl_setopt($ch, CURLOPT_MAX_SEND_SPEED_LARGE, $s);
- curl_setopt ($ch, CURLOPT_ENCODING, 'gzip,deflate');
- //curl_setopt($ch, CURL_HTTP_VERSION, 'CURL_HTTP_VERSION_1_1');
- $html=false;
- try
- {
- $html=curl_exec($ch);
- //Gestion des erreurs Http
- $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
- if($httpCode >= 400)
- {
- echo 'Code http: '.$httpCode."\n";
- exit;
- }
- }
- catch(Exception $e)
- {
- echo 'Exception : '.$e;
- exit;
- }
- curl_close($ch);
- return $html;
- }
- function __destruct()
- {
- }
- }
- //Exemple d'utilisation de la classe
- /*
- require 'curl.class.php';
- ini_set('display_errors', 1);
- $oCurl=new Curl();
- echo $oCurl->execHttp('http://www.dsfc.net/feed/');
- unset($oPage);
- */
- ?>
RAW Paste Data
Copied