Advertisement
Guest User

Untitled

a guest
Oct 20th, 2017
49
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 38.30 KB | None | 0 0
  1. <?php
  2.  
  3. /*************************************************
  4.  
  5. Snoopy - the PHP net client
  6. Author: Monte Ohrt <monte@ispi.net>
  7. Copyright (c): 1999-2000 ispi, all rights reserved
  8. Version: 1.01
  9.  
  10. * This library is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU Lesser General Public
  12. * License as published by the Free Software Foundation; either
  13. * version 2.1 of the License, or (at your option) any later version.
  14. *
  15. * This library is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * Lesser General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Lesser General Public
  21. * License along with this library; if not, write to the Free Software
  22. * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  23.  
  24. You may contact the author of Snoopy by e-mail at:
  25. monte@ispi.net
  26.  
  27. Or, write to:
  28. Monte Ohrt
  29. CTO, ispi
  30. 237 S. 70th suite 220
  31. Lincoln, NE 68510
  32.  
  33. The latest version of Snoopy can be obtained from:
  34. http://snoopy.sourceforge.net/
  35.  
  36. *************************************************/
  37.  
  38. class Snoopy
  39. {
  40. /**** Public variables ****/
  41.  
  42. /* user definable vars */
  43.  
  44. var $host = "www.php.net"; // host name we are connecting to
  45. var $port = 80; // port we are connecting to
  46. var $proxy_host = ""; // proxy host to use
  47. var $proxy_port = ""; // proxy port to use
  48. var $proxy_user = ""; // proxy user to use
  49. var $proxy_pass = ""; // proxy password to use
  50.  
  51. var $agent = "Snoopy v1.2.3"; // agent we masquerade as
  52. var $referer = ""; // referer info to pass
  53. var $cookies = array(); // array of cookies to pass
  54. // $cookies["username"]="joe";
  55. var $rawheaders = array(); // array of raw headers to send
  56. // $rawheaders["Content-type"]="text/html";
  57.  
  58. var $maxredirs = 5; // http redirection depth maximum. 0 = disallow
  59. var $lastredirectaddr = ""; // contains address of last redirected address
  60. var $offsiteok = true; // allows redirection off-site
  61. var $maxframes = 0; // frame content depth maximum. 0 = disallow
  62. var $expandlinks = true; // expand links to fully qualified URLs.
  63. // this only applies to fetchlinks()
  64. // submitlinks(), and submittext()
  65. var $passcookies = true; // pass set cookies back through redirects
  66. // NOTE: this currently does not respect
  67. // dates, domains or paths.
  68.  
  69. var $user = ""; // user for http authentication
  70. var $pass = ""; // password for http authentication
  71.  
  72. // http accept types
  73. var $accept = "image/gif, image/x-xbitmap, image/jpeg, image/pjpeg, */*";
  74.  
  75. var $results = ""; // where the content is put
  76.  
  77. var $error = ""; // error messages sent here
  78. var $response_code = ""; // response code returned from server
  79. var $headers = array(); // headers returned from server sent here
  80. var $maxlength = 500000; // max return data length (body)
  81. var $read_timeout = 0; // timeout on read operations, in seconds
  82. // supported only since PHP 4 Beta 4
  83. // set to 0 to disallow timeouts
  84. var $timed_out = false; // if a read operation timed out
  85. var $status = 0; // http request status
  86.  
  87. var $temp_dir = "/tmp"; // temporary directory that the webserver
  88. // has permission to write to.
  89. // under Windows, this should be C:\temp
  90.  
  91. var $curl_path = "/usr/local/bin/curl";
  92. // Snoopy will use cURL for fetching
  93. // SSL content if a full system path to
  94. // the cURL binary is supplied here.
  95. // set to false if you do not have
  96. // cURL installed. See http://curl.haxx.se
  97. // for details on installing cURL.
  98. // Snoopy does *not* use the cURL
  99. // library functions built into php,
  100. // as these functions are not stable
  101. // as of this Snoopy release.
  102.  
  103. /**** Private variables ****/
  104.  
  105. var $_maxlinelen = 4096; // max line length (headers)
  106.  
  107. var $_httpmethod = "GET"; // default http request method
  108. var $_httpversion = "HTTP/1.0"; // default http request version
  109. var $_submit_method = "POST"; // default submit method
  110. var $_submit_type = "application/x-www-form-urlencoded"; // default submit type
  111. var $_mime_boundary = ""; // MIME boundary for multipart/form-data submit type
  112. var $_redirectaddr = false; // will be set if page fetched is a redirect
  113. var $_redirectdepth = 0; // increments on an http redirect
  114. var $_frameurls = array(); // frame src urls
  115. var $_framedepth = 0; // increments on frame depth
  116.  
  117. var $_isproxy = false; // set if using a proxy server
  118. var $_fp_timeout = 30; // timeout for socket connection
  119.  
  120. /*======================================================================*\
  121. Function: fetch
  122. Purpose: fetch the contents of a web page
  123. (and possibly other protocols in the
  124. future like ftp, nntp, gopher, etc.)
  125. Input: $URI the location of the page to fetch
  126. Output: $this->results the output text from the fetch
  127. \*======================================================================*/
  128.  
  129. function fetch($URI)
  130. {
  131.  
  132. //preg_match("|^([^:]+)://([^:/]+)(:[\d]+)*(.*)|",$URI,$URI_PARTS);
  133. $URI_PARTS = parse_url($URI);
  134. if (!empty($URI_PARTS["user"]))
  135. $this->user = $URI_PARTS["user"];
  136. if (!empty($URI_PARTS["pass"]))
  137. $this->pass = $URI_PARTS["pass"];
  138. if (empty($URI_PARTS["query"]))
  139. $URI_PARTS["query"] = '';
  140. if (empty($URI_PARTS["path"]))
  141. $URI_PARTS["path"] = '';
  142.  
  143. switch(strtolower($URI_PARTS["scheme"]))
  144. {
  145. case "http":
  146. $this->host = $URI_PARTS["host"];
  147. if(!empty($URI_PARTS["port"]))
  148. $this->port = $URI_PARTS["port"];
  149. if($this->_connect($fp))
  150. {
  151. if($this->_isproxy)
  152. {
  153. // using proxy, send entire URI
  154. $this->_httprequest($URI,$fp,$URI,$this->_httpmethod);
  155. }
  156. else
  157. {
  158. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  159. // no proxy, send only the path
  160. $this->_httprequest($path, $fp, $URI, $this->_httpmethod);
  161. }
  162.  
  163. $this->_disconnect($fp);
  164.  
  165. if($this->_redirectaddr)
  166. {
  167. /* url was redirected, check if we've hit the max depth */
  168. if($this->maxredirs > $this->_redirectdepth)
  169. {
  170. // only follow redirect if it's on this site, or offsiteok is true
  171. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  172. {
  173. /* follow the redirect */
  174. $this->_redirectdepth++;
  175. $this->lastredirectaddr=$this->_redirectaddr;
  176. $this->fetch($this->_redirectaddr);
  177. }
  178. }
  179. }
  180.  
  181. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  182. {
  183. $frameurls = $this->_frameurls;
  184. $this->_frameurls = array();
  185.  
  186. while(list(,$frameurl) = each($frameurls))
  187. {
  188. if($this->_framedepth < $this->maxframes)
  189. {
  190. $this->fetch($frameurl);
  191. $this->_framedepth++;
  192. }
  193. else
  194. break;
  195. }
  196. }
  197. }
  198. else
  199. {
  200. return false;
  201. }
  202. return true;
  203. break;
  204. case "https":
  205. if(!$this->curl_path)
  206. return false;
  207. if(function_exists("is_executable"))
  208. if (!is_executable($this->curl_path))
  209. return false;
  210. $this->host = $URI_PARTS["host"];
  211. if(!empty($URI_PARTS["port"]))
  212. $this->port = $URI_PARTS["port"];
  213. if($this->_isproxy)
  214. {
  215. // using proxy, send entire URI
  216. $this->_httpsrequest($URI,$URI,$this->_httpmethod);
  217. }
  218. else
  219. {
  220. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  221. // no proxy, send only the path
  222. $this->_httpsrequest($path, $URI, $this->_httpmethod);
  223. }
  224.  
  225. if($this->_redirectaddr)
  226. {
  227. /* url was redirected, check if we've hit the max depth */
  228. if($this->maxredirs > $this->_redirectdepth)
  229. {
  230. // only follow redirect if it's on this site, or offsiteok is true
  231. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  232. {
  233. /* follow the redirect */
  234. $this->_redirectdepth++;
  235. $this->lastredirectaddr=$this->_redirectaddr;
  236. $this->fetch($this->_redirectaddr);
  237. }
  238. }
  239. }
  240.  
  241. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  242. {
  243. $frameurls = $this->_frameurls;
  244. $this->_frameurls = array();
  245.  
  246. while(list(,$frameurl) = each($frameurls))
  247. {
  248. if($this->_framedepth < $this->maxframes)
  249. {
  250. $this->fetch($frameurl);
  251. $this->_framedepth++;
  252. }
  253. else
  254. break;
  255. }
  256. }
  257. return true;
  258. break;
  259. default:
  260. // not a valid protocol
  261. $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
  262. return false;
  263. break;
  264. }
  265. return true;
  266. }
  267.  
  268. /*======================================================================*\
  269. Function: submit
  270. Purpose: submit an http form
  271. Input: $URI the location to post the data
  272. $formvars the formvars to use.
  273. format: $formvars["var"] = "val";
  274. $formfiles an array of files to submit
  275. format: $formfiles["var"] = "/dir/filename.ext";
  276. Output: $this->results the text output from the post
  277. \*======================================================================*/
  278.  
  279. function submit($URI, $formvars="", $formfiles="")
  280. {
  281. unset($postdata);
  282.  
  283. $postdata = $this->_prepare_post_body($formvars, $formfiles);
  284.  
  285. $URI_PARTS = parse_url($URI);
  286. if (!empty($URI_PARTS["user"]))
  287. $this->user = $URI_PARTS["user"];
  288. if (!empty($URI_PARTS["pass"]))
  289. $this->pass = $URI_PARTS["pass"];
  290. if (empty($URI_PARTS["query"]))
  291. $URI_PARTS["query"] = '';
  292. if (empty($URI_PARTS["path"]))
  293. $URI_PARTS["path"] = '';
  294.  
  295. switch(strtolower($URI_PARTS["scheme"]))
  296. {
  297. case "http":
  298. $this->host = $URI_PARTS["host"];
  299. if(!empty($URI_PARTS["port"]))
  300. $this->port = $URI_PARTS["port"];
  301. if($this->_connect($fp))
  302. {
  303. if($this->_isproxy)
  304. {
  305. // using proxy, send entire URI
  306. $this->_httprequest($URI,$fp,$URI,$this->_submit_method,$this->_submit_type,$postdata);
  307. }
  308. else
  309. {
  310. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  311. // no proxy, send only the path
  312. $this->_httprequest($path, $fp, $URI, $this->_submit_method, $this->_submit_type, $postdata);
  313. }
  314.  
  315. $this->_disconnect($fp);
  316.  
  317. if($this->_redirectaddr)
  318. {
  319. /* url was redirected, check if we've hit the max depth */
  320. if($this->maxredirs > $this->_redirectdepth)
  321. {
  322. if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
  323. $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
  324.  
  325. // only follow redirect if it's on this site, or offsiteok is true
  326. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  327. {
  328. /* follow the redirect */
  329. $this->_redirectdepth++;
  330. $this->lastredirectaddr=$this->_redirectaddr;
  331. if( strpos( $this->_redirectaddr, "?" ) > 0 )
  332. $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
  333. else
  334. $this->submit($this->_redirectaddr,$formvars, $formfiles);
  335. }
  336. }
  337. }
  338.  
  339. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  340. {
  341. $frameurls = $this->_frameurls;
  342. $this->_frameurls = array();
  343.  
  344. while(list(,$frameurl) = each($frameurls))
  345. {
  346. if($this->_framedepth < $this->maxframes)
  347. {
  348. $this->fetch($frameurl);
  349. $this->_framedepth++;
  350. }
  351. else
  352. break;
  353. }
  354. }
  355.  
  356. }
  357. else
  358. {
  359. return false;
  360. }
  361. return true;
  362. break;
  363. case "https":
  364. if(!$this->curl_path)
  365. return false;
  366. if(function_exists("is_executable"))
  367. if (!is_executable($this->curl_path))
  368. return false;
  369. $this->host = $URI_PARTS["host"];
  370. if(!empty($URI_PARTS["port"]))
  371. $this->port = $URI_PARTS["port"];
  372. if($this->_isproxy)
  373. {
  374. // using proxy, send entire URI
  375. $this->_httpsrequest($URI, $URI, $this->_submit_method, $this->_submit_type, $postdata);
  376. }
  377. else
  378. {
  379. $path = $URI_PARTS["path"].($URI_PARTS["query"] ? "?".$URI_PARTS["query"] : "");
  380. // no proxy, send only the path
  381. $this->_httpsrequest($path, $URI, $this->_submit_method, $this->_submit_type, $postdata);
  382. }
  383.  
  384. if($this->_redirectaddr)
  385. {
  386. /* url was redirected, check if we've hit the max depth */
  387. if($this->maxredirs > $this->_redirectdepth)
  388. {
  389. if(!preg_match("|^".$URI_PARTS["scheme"]."://|", $this->_redirectaddr))
  390. $this->_redirectaddr = $this->_expandlinks($this->_redirectaddr,$URI_PARTS["scheme"]."://".$URI_PARTS["host"]);
  391.  
  392. // only follow redirect if it's on this site, or offsiteok is true
  393. if(preg_match("|^http://".preg_quote($this->host)."|i",$this->_redirectaddr) || $this->offsiteok)
  394. {
  395. /* follow the redirect */
  396. $this->_redirectdepth++;
  397. $this->lastredirectaddr=$this->_redirectaddr;
  398. if( strpos( $this->_redirectaddr, "?" ) > 0 )
  399. $this->fetch($this->_redirectaddr); // the redirect has changed the request method from post to get
  400. else
  401. $this->submit($this->_redirectaddr,$formvars, $formfiles);
  402. }
  403. }
  404. }
  405.  
  406. if($this->_framedepth < $this->maxframes && count($this->_frameurls) > 0)
  407. {
  408. $frameurls = $this->_frameurls;
  409. $this->_frameurls = array();
  410.  
  411. while(list(,$frameurl) = each($frameurls))
  412. {
  413. if($this->_framedepth < $this->maxframes)
  414. {
  415. $this->fetch($frameurl);
  416. $this->_framedepth++;
  417. }
  418. else
  419. break;
  420. }
  421. }
  422. return true;
  423. break;
  424.  
  425. default:
  426. // not a valid protocol
  427. $this->error = 'Invalid protocol "'.$URI_PARTS["scheme"].'"\n';
  428. return false;
  429. break;
  430. }
  431. return true;
  432. }
  433.  
  434. /*======================================================================*\
  435. Function: fetchlinks
  436. Purpose: fetch the links from a web page
  437. Input: $URI where you are fetching from
  438. Output: $this->results an array of the URLs
  439. \*======================================================================*/
  440.  
  441. function fetchlinks($URI)
  442. {
  443. if ($this->fetch($URI))
  444. {
  445. if($this->lastredirectaddr)
  446. $URI = $this->lastredirectaddr;
  447. if(is_array($this->results))
  448. {
  449. for($x=0;$x<count($this->results);$x++)
  450. $this->results[$x] = $this->_striplinks($this->results[$x]);
  451. }
  452. else
  453. $this->results = $this->_striplinks($this->results);
  454.  
  455. if($this->expandlinks)
  456. $this->results = $this->_expandlinks($this->results, $URI);
  457. return true;
  458. }
  459. else
  460. return false;
  461. }
  462.  
  463. /*======================================================================*\
  464. Function: fetchform
  465. Purpose: fetch the form elements from a web page
  466. Input: $URI where you are fetching from
  467. Output: $this->results the resulting html form
  468. \*======================================================================*/
  469.  
  470. function fetchform($URI)
  471. {
  472.  
  473. if ($this->fetch($URI))
  474. {
  475.  
  476. if(is_array($this->results))
  477. {
  478. for($x=0;$x<count($this->results);$x++)
  479. $this->results[$x] = $this->_stripform($this->results[$x]);
  480. }
  481. else
  482. $this->results = $this->_stripform($this->results);
  483.  
  484. return true;
  485. }
  486. else
  487. return false;
  488. }
  489.  
  490.  
  491. /*======================================================================*\
  492. Function: fetchtext
  493. Purpose: fetch the text from a web page, stripping the links
  494. Input: $URI where you are fetching from
  495. Output: $this->results the text from the web page
  496. \*======================================================================*/
  497.  
  498. function fetchtext($URI)
  499. {
  500. if($this->fetch($URI))
  501. {
  502. if(is_array($this->results))
  503. {
  504. for($x=0;$x<count($this->results);$x++)
  505. $this->results[$x] = $this->_striptext($this->results[$x]);
  506. }
  507. else
  508. $this->results = $this->_striptext($this->results);
  509. return true;
  510. }
  511. else
  512. return false;
  513. }
  514.  
  515. /*======================================================================*\
  516. Function: submitlinks
  517. Purpose: grab links from a form submission
  518. Input: $URI where you are submitting from
  519. Output: $this->results an array of the links from the post
  520. \*======================================================================*/
  521.  
  522. function submitlinks($URI, $formvars="", $formfiles="")
  523. {
  524. if($this->submit($URI,$formvars, $formfiles))
  525. {
  526. if($this->lastredirectaddr)
  527. $URI = $this->lastredirectaddr;
  528. if(is_array($this->results))
  529. {
  530. for($x=0;$x<count($this->results);$x++)
  531. {
  532. $this->results[$x] = $this->_striplinks($this->results[$x]);
  533. if($this->expandlinks)
  534. $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
  535. }
  536. }
  537. else
  538. {
  539. $this->results = $this->_striplinks($this->results);
  540. if($this->expandlinks)
  541. $this->results = $this->_expandlinks($this->results,$URI);
  542. }
  543. return true;
  544. }
  545. else
  546. return false;
  547. }
  548.  
  549. /*======================================================================*\
  550. Function: submittext
  551. Purpose: grab text from a form submission
  552. Input: $URI where you are submitting from
  553. Output: $this->results the text from the web page
  554. \*======================================================================*/
  555.  
  556. function submittext($URI, $formvars = "", $formfiles = "")
  557. {
  558. if($this->submit($URI,$formvars, $formfiles))
  559. {
  560. if($this->lastredirectaddr)
  561. $URI = $this->lastredirectaddr;
  562. if(is_array($this->results))
  563. {
  564. for($x=0;$x<count($this->results);$x++)
  565. {
  566. $this->results[$x] = $this->_striptext($this->results[$x]);
  567. if($this->expandlinks)
  568. $this->results[$x] = $this->_expandlinks($this->results[$x],$URI);
  569. }
  570. }
  571. else
  572. {
  573. $this->results = $this->_striptext($this->results);
  574. if($this->expandlinks)
  575. $this->results = $this->_expandlinks($this->results,$URI);
  576. }
  577. return true;
  578. }
  579. else
  580. return false;
  581. }
  582.  
  583.  
  584.  
  585. /*======================================================================*\
  586. Function: set_submit_multipart
  587. Purpose: Set the form submission content type to
  588. multipart/form-data
  589. \*======================================================================*/
  590. function set_submit_multipart()
  591. {
  592. $this->_submit_type = "multipart/form-data";
  593. }
  594.  
  595.  
  596. /*======================================================================*\
  597. Function: set_submit_normal
  598. Purpose: Set the form submission content type to
  599. application/x-www-form-urlencoded
  600. \*======================================================================*/
  601. function set_submit_normal()
  602. {
  603. $this->_submit_type = "application/x-www-form-urlencoded";
  604. }
  605.  
  606.  
  607.  
  608.  
  609. /*======================================================================*\
  610. Private functions
  611. \*======================================================================*/
  612.  
  613.  
  614. /*======================================================================*\
  615. Function: _striplinks
  616. Purpose: strip the hyperlinks from an html document
  617. Input: $document document to strip.
  618. Output: $match an array of the links
  619. \*======================================================================*/
  620.  
  621. function _striplinks($document)
  622. {
  623. preg_match_all("'<\s*a\s.*?href\s*=\s* # find <a href=
  624. ([\"\'])? # find single or double quote
  625. (?(1) (.*?)\\1 | ([^\s\>]+)) # if quote found, match up to next matching
  626. # quote, otherwise match up to next space
  627. 'isx",$document,$links);
  628.  
  629.  
  630. // catenate the non-empty matches from the conditional subpattern
  631.  
  632. while(list($key,$val) = each($links[2]))
  633. {
  634. if(!empty($val))
  635. $match[] = $val;
  636. }
  637.  
  638. while(list($key,$val) = each($links[3]))
  639. {
  640. if(!empty($val))
  641. $match[] = $val;
  642. }
  643.  
  644. // return the links
  645. return $match;
  646. }
  647.  
  648. /*======================================================================*\
  649. Function: _stripform
  650. Purpose: strip the form elements from an html document
  651. Input: $document document to strip.
  652. Output: $match an array of the links
  653. \*======================================================================*/
  654.  
  655. function _stripform($document)
  656. {
  657. preg_match_all("'<\/?(FORM|INPUT|SELECT|TEXTAREA|(OPTION))[^<>]*>(?(2)(.*(?=<\/?(option|select)[^<>]*>[\r\n]*)|(?=[\r\n]*))|(?=[\r\n]*))'Usi",$document,$elements);
  658.  
  659. // catenate the matches
  660. $match = implode("\r\n",$elements[0]);
  661.  
  662. // return the links
  663. return $match;
  664. }
  665.  
  666.  
  667.  
  668. /*======================================================================*\
  669. Function: _striptext
  670. Purpose: strip the text from an html document
  671. Input: $document document to strip.
  672. Output: $text the resulting text
  673. \*======================================================================*/
  674.  
  675. function _striptext($document)
  676. {
  677.  
  678. // I didn't use preg eval (//e) since that is only available in PHP 4.0.
  679. // so, list your entities one by one here. I included some of the
  680. // more common ones.
  681.  
  682. $search = array("'<script[^>]*?>.*?</script>'si", // strip out javascript
  683. "'<[\/\!]*?[^<>]*?>'si", // strip out html tags
  684. "'([\r\n])[\s]+'", // strip out white space
  685. "'&(quot|#34|#034|#x22);'i", // replace html entities
  686. "'&(amp|#38|#038|#x26);'i", // added hexadecimal values
  687. "'&(lt|#60|#060|#x3c);'i",
  688. "'&(gt|#62|#062|#x3e);'i",
  689. "'&(nbsp|#160|#xa0);'i",
  690. "'&(iexcl|#161);'i",
  691. "'&(cent|#162);'i",
  692. "'&(pound|#163);'i",
  693. "'&(copy|#169);'i",
  694. "'&(reg|#174);'i",
  695. "'&(deg|#176);'i",
  696. "'&(#39|#039|#x27);'",
  697. "'&(euro|#8364);'i", // europe
  698. "'&a(uml|UML);'", // german
  699. "'&o(uml|UML);'",
  700. "'&u(uml|UML);'",
  701. "'&A(uml|UML);'",
  702. "'&O(uml|UML);'",
  703. "'&U(uml|UML);'",
  704. "'&szlig;'i",
  705. );
  706. $replace = array( "",
  707. "",
  708. "\\1",
  709. "\"",
  710. "&",
  711. "<",
  712. ">",
  713. " ",
  714. chr(161),
  715. chr(162),
  716. chr(163),
  717. chr(169),
  718. chr(174),
  719. chr(176),
  720. chr(39),
  721. chr(128),
  722. "ä",
  723. "ö",
  724. "ü",
  725. "Ä",
  726. "Ö",
  727. "Ü",
  728. "ß",
  729. );
  730.  
  731. $text = preg_replace($search,$replace,$document);
  732.  
  733. return $text;
  734. }
  735.  
  736. /*======================================================================*\
  737. Function: _expandlinks
  738. Purpose: expand each link into a fully qualified URL
  739. Input: $links the links to qualify
  740. $URI the full URI to get the base from
  741. Output: $expandedLinks the expanded links
  742. \*======================================================================*/
  743.  
  744. function _expandlinks($links,$URI)
  745. {
  746.  
  747. preg_match("/^[^\?]+/",$URI,$match);
  748.  
  749. $match = preg_replace("|/[^\/\.]+\.[^\/\.]+$|","",$match[0]);
  750. $match = preg_replace("|/$|","",$match);
  751. $match_part = parse_url($match);
  752. $match_root =
  753. $match_part["scheme"]."://".$match_part["host"];
  754.  
  755. $search = array( "|^http://".preg_quote($this->host)."|i",
  756. "|^(\/)|i",
  757. "|^(?!http://)(?!mailto:)|i",
  758. "|/\./|",
  759. "|/[^\/]+/\.\./|"
  760. );
  761.  
  762. $replace = array( "",
  763. $match_root."/",
  764. $match."/",
  765. "/",
  766. "/"
  767. );
  768.  
  769. $expandedLinks = preg_replace($search,$replace,$links);
  770.  
  771. return $expandedLinks;
  772. }
  773.  
  774. /*======================================================================*\
  775. Function: _httprequest
  776. Purpose: go get the http data from the server
  777. Input: $url the url to fetch
  778. $fp the current open file pointer
  779. $URI the full URI
  780. $body body contents to send if any (POST)
  781. Output:
  782. \*======================================================================*/
  783.  
  784. function _httprequest($url,$fp,$URI,$http_method,$content_type="",$body="")
  785. {
  786. $cookie_headers = '';
  787. if($this->passcookies && $this->_redirectaddr)
  788. $this->setcookies();
  789.  
  790. $URI_PARTS = parse_url($URI);
  791. if(empty($url))
  792. $url = "/";
  793. $headers = $http_method." ".$url." ".$this->_httpversion."\r\n";
  794. if(!empty($this->agent))
  795. $headers .= "User-Agent: ".$this->agent."\r\n";
  796. if(!empty($this->host) && !isset($this->rawheaders['Host'])) {
  797. $headers .= "Host: ".$this->host;
  798. if(!empty($this->port))
  799. $headers .= ":".$this->port;
  800. $headers .= "\r\n";
  801. }
  802. if(!empty($this->accept))
  803. $headers .= "Accept: ".$this->accept."\r\n";
  804. if(!empty($this->referer))
  805. $headers .= "Referer: ".$this->referer."\r\n";
  806. if(!empty($this->cookies))
  807. {
  808. if(!is_array($this->cookies))
  809. $this->cookies = (array)$this->cookies;
  810.  
  811. reset($this->cookies);
  812. if ( count($this->cookies) > 0 ) {
  813. $cookie_headers .= 'Cookie: ';
  814. foreach ( $this->cookies as $cookieKey => $cookieVal ) {
  815. $cookie_headers .= $cookieKey."=".urlencode($cookieVal)."; ";
  816. }
  817. $headers .= substr($cookie_headers,0,-2) . "\r\n";
  818. }
  819. }
  820. if(!empty($this->rawheaders))
  821. {
  822. if(!is_array($this->rawheaders))
  823. $this->rawheaders = (array)$this->rawheaders;
  824. while(list($headerKey,$headerVal) = each($this->rawheaders))
  825. $headers .= $headerKey.": ".$headerVal."\r\n";
  826. }
  827. if(!empty($content_type)) {
  828. $headers .= "Content-type: $content_type";
  829. if ($content_type == "multipart/form-data")
  830. $headers .= "; boundary=".$this->_mime_boundary;
  831. $headers .= "\r\n";
  832. }
  833. if(!empty($body))
  834. $headers .= "Content-length: ".strlen($body)."\r\n";
  835. if(!empty($this->user) || !empty($this->pass))
  836. $headers .= "Authorization: Basic ".base64_encode($this->user.":".$this->pass)."\r\n";
  837.  
  838. //add proxy auth headers
  839. if(!empty($this->proxy_user))
  840. $headers .= 'Proxy-Authorization: ' . 'Basic ' . base64_encode($this->proxy_user . ':' . $this->proxy_pass)."\r\n";
  841.  
  842.  
  843. $headers .= "\r\n";
  844.  
  845. // set the read timeout if needed
  846. if ($this->read_timeout > 0)
  847. socket_set_timeout($fp, $this->read_timeout);
  848. $this->timed_out = false;
  849.  
  850. fwrite($fp,$headers.$body,strlen($headers.$body));
  851.  
  852. $this->_redirectaddr = false;
  853. unset($this->headers);
  854.  
  855. while($currentHeader = fgets($fp,$this->_maxlinelen))
  856. {
  857. if ($this->read_timeout > 0 && $this->_check_timeout($fp))
  858. {
  859. $this->status=-100;
  860. return false;
  861. }
  862.  
  863. if($currentHeader == "\r\n")
  864. break;
  865.  
  866. // if a header begins with Location: or URI:, set the redirect
  867. if(preg_match("/^(Location:|URI:)/i",$currentHeader))
  868. {
  869. // get URL portion of the redirect
  870. preg_match("/^(Location:|URI:)[ ]+(.*)/i",chop($currentHeader),$matches);
  871. // look for :// in the Location header to see if hostname is included
  872. if(!preg_match("|\:\/\/|",$matches[2]))
  873. {
  874. // no host in the path, so prepend
  875. $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
  876. // eliminate double slash
  877. if(!preg_match("|^/|",$matches[2]))
  878. $this->_redirectaddr .= "/".$matches[2];
  879. else
  880. $this->_redirectaddr .= $matches[2];
  881. }
  882. else
  883. $this->_redirectaddr = $matches[2];
  884. }
  885.  
  886. if(preg_match("|^HTTP/|",$currentHeader))
  887. {
  888. if(preg_match("|^HTTP/[^\s]*\s(.*?)\s|",$currentHeader, $status))
  889. {
  890. $this->status= $status[1];
  891. }
  892. $this->response_code = $currentHeader;
  893. }
  894.  
  895. $this->headers[] = $currentHeader;
  896. }
  897.  
  898. $results = '';
  899. do {
  900. $_data = fread($fp, $this->maxlength);
  901. if (strlen($_data) == 0) {
  902. break;
  903. }
  904. $results .= $_data;
  905. } while(true);
  906.  
  907. if ($this->read_timeout > 0 && $this->_check_timeout($fp))
  908. {
  909. $this->status=-100;
  910. return false;
  911. }
  912.  
  913. // check if there is a a redirect meta tag
  914.  
  915. if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
  916.  
  917. {
  918. $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
  919. }
  920.  
  921. // have we hit our frame depth and is there frame src to fetch?
  922. if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
  923. {
  924. $this->results[] = $results;
  925. for($x=0; $x<count($match[1]); $x++)
  926. $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  927. }
  928. // have we already fetched framed content?
  929. elseif(is_array($this->results))
  930. $this->results[] = $results;
  931. // no framed content
  932. else
  933. $this->results = $results;
  934.  
  935. return true;
  936. }
  937.  
  938. /*======================================================================*\
  939. Function: _httpsrequest
  940. Purpose: go get the https data from the server using curl
  941. Input: $url the url to fetch
  942. $URI the full URI
  943. $body body contents to send if any (POST)
  944. Output:
  945. \*======================================================================*/
  946.  
  947. function _httpsrequest($url,$URI,$http_method,$content_type="",$body="")
  948. {
  949. if($this->passcookies && $this->_redirectaddr)
  950. $this->setcookies();
  951.  
  952. $headers = array();
  953.  
  954. $URI_PARTS = parse_url($URI);
  955. if(empty($url))
  956. $url = "/";
  957. // GET ... header not needed for curl
  958. //$headers[] = $http_method." ".$url." ".$this->_httpversion;
  959. if(!empty($this->agent))
  960. $headers[] = "User-Agent: ".$this->agent;
  961. if(!empty($this->host))
  962. if(!empty($this->port))
  963. $headers[] = "Host: ".$this->host.":".$this->port;
  964. else
  965. $headers[] = "Host: ".$this->host;
  966. if(!empty($this->accept))
  967. $headers[] = "Accept: ".$this->accept;
  968. if(!empty($this->referer))
  969. $headers[] = "Referer: ".$this->referer;
  970. if(!empty($this->cookies))
  971. {
  972. if(!is_array($this->cookies))
  973. $this->cookies = (array)$this->cookies;
  974.  
  975. reset($this->cookies);
  976. if ( count($this->cookies) > 0 ) {
  977. $cookie_str = 'Cookie: ';
  978. foreach ( $this->cookies as $cookieKey => $cookieVal ) {
  979. $cookie_str .= $cookieKey."=".urlencode($cookieVal)."; ";
  980. }
  981. $headers[] = substr($cookie_str,0,-2);
  982. }
  983. }
  984. if(!empty($this->rawheaders))
  985. {
  986. if(!is_array($this->rawheaders))
  987. $this->rawheaders = (array)$this->rawheaders;
  988. while(list($headerKey,$headerVal) = each($this->rawheaders))
  989. $headers[] = $headerKey.": ".$headerVal;
  990. }
  991. if(!empty($content_type)) {
  992. if ($content_type == "multipart/form-data")
  993. $headers[] = "Content-type: $content_type; boundary=".$this->_mime_boundary;
  994. else
  995. $headers[] = "Content-type: $content_type";
  996. }
  997. if(!empty($body))
  998. $headers[] = "Content-length: ".strlen($body);
  999. if(!empty($this->user) || !empty($this->pass))
  1000. $headers[] = "Authorization: BASIC ".base64_encode($this->user.":".$this->pass);
  1001.  
  1002. for($curr_header = 0; $curr_header < count($headers); $curr_header++) {
  1003. $safer_header = strtr( $headers[$curr_header], "\"", " " );
  1004. $cmdline_params .= " -H \"".$safer_header."\"";
  1005. }
  1006.  
  1007. if(!empty($body))
  1008. $cmdline_params .= " -d \"$body\"";
  1009.  
  1010. if($this->read_timeout > 0)
  1011. $cmdline_params .= " -m ".$this->read_timeout;
  1012.  
  1013. $headerfile = tempnam($temp_dir, "sno");
  1014.  
  1015. $safer_URI = strtr( $URI, "\"", " " ); // strip quotes from the URI to avoid shell access
  1016. exec($this->curl_path." -D \"$headerfile\"".$cmdline_params." \"".$safer_URI."\"",$results,$return);
  1017.  
  1018. if($return)
  1019. {
  1020. $this->error = "Error: cURL could not retrieve the document, error $return.";
  1021. return false;
  1022. }
  1023.  
  1024.  
  1025. $results = implode("\r\n",$results);
  1026.  
  1027. $result_headers = file("$headerfile");
  1028.  
  1029. $this->_redirectaddr = false;
  1030. unset($this->headers);
  1031.  
  1032. for($currentHeader = 0; $currentHeader < count($result_headers); $currentHeader++)
  1033. {
  1034.  
  1035. // if a header begins with Location: or URI:, set the redirect
  1036. if(preg_match("/^(Location: |URI: )/i",$result_headers[$currentHeader]))
  1037. {
  1038. // get URL portion of the redirect
  1039. preg_match("/^(Location: |URI:)\s+(.*)/",chop($result_headers[$currentHeader]),$matches);
  1040. // look for :// in the Location header to see if hostname is included
  1041. if(!preg_match("|\:\/\/|",$matches[2]))
  1042. {
  1043. // no host in the path, so prepend
  1044. $this->_redirectaddr = $URI_PARTS["scheme"]."://".$this->host.":".$this->port;
  1045. // eliminate double slash
  1046. if(!preg_match("|^/|",$matches[2]))
  1047. $this->_redirectaddr .= "/".$matches[2];
  1048. else
  1049. $this->_redirectaddr .= $matches[2];
  1050. }
  1051. else
  1052. $this->_redirectaddr = $matches[2];
  1053. }
  1054.  
  1055. if(preg_match("|^HTTP/|",$result_headers[$currentHeader]))
  1056. $this->response_code = $result_headers[$currentHeader];
  1057.  
  1058. $this->headers[] = $result_headers[$currentHeader];
  1059. }
  1060.  
  1061. // check if there is a a redirect meta tag
  1062.  
  1063. if(preg_match("'<meta[\s]*http-equiv[^>]*?content[\s]*=[\s]*[\"\']?\d+;[\s]*URL[\s]*=[\s]*([^\"\']*?)[\"\']?>'i",$results,$match))
  1064. {
  1065. $this->_redirectaddr = $this->_expandlinks($match[1],$URI);
  1066. }
  1067.  
  1068. // have we hit our frame depth and is there frame src to fetch?
  1069. if(($this->_framedepth < $this->maxframes) && preg_match_all("'<frame\s+.*src[\s]*=[\'\"]?([^\'\"\>]+)'i",$results,$match))
  1070. {
  1071. $this->results[] = $results;
  1072. for($x=0; $x<count($match[1]); $x++)
  1073. $this->_frameurls[] = $this->_expandlinks($match[1][$x],$URI_PARTS["scheme"]."://".$this->host);
  1074. }
  1075. // have we already fetched framed content?
  1076. elseif(is_array($this->results))
  1077. $this->results[] = $results;
  1078. // no framed content
  1079. else
  1080. $this->results = $results;
  1081.  
  1082. unlink("$headerfile");
  1083.  
  1084. return true;
  1085. }
  1086.  
  1087. /*======================================================================*\
  1088. Function: setcookies()
  1089. Purpose: set cookies for a redirection
  1090. \*======================================================================*/
  1091.  
  1092. function setcookies()
  1093. {
  1094. for($x=0; $x<count($this->headers); $x++)
  1095. {
  1096. if(preg_match('/^set-cookie:[\s]+([^=]+)=([^;]+)/i', $this->headers[$x],$match))
  1097. $this->cookies[$match[1]] = urldecode($match[2]);
  1098. }
  1099. }
  1100.  
  1101.  
  1102. /*======================================================================*\
  1103. Function: _check_timeout
  1104. Purpose: checks whether timeout has occurred
  1105. Input: $fp file pointer
  1106. \*======================================================================*/
  1107.  
  1108. function _check_timeout($fp)
  1109. {
  1110. if ($this->read_timeout > 0) {
  1111. $fp_status = socket_get_status($fp);
  1112. if ($fp_status["timed_out"]) {
  1113. $this->timed_out = true;
  1114. return true;
  1115. }
  1116. }
  1117. return false;
  1118. }
  1119.  
  1120. /*======================================================================*\
  1121. Function: _connect
  1122. Purpose: make a socket connection
  1123. Input: $fp file pointer
  1124. \*======================================================================*/
  1125.  
  1126. function _connect(&$fp)
  1127. {
  1128. if(!empty($this->proxy_host) && !empty($this->proxy_port))
  1129. {
  1130. $this->_isproxy = true;
  1131.  
  1132. $host = $this->proxy_host;
  1133. $port = $this->proxy_port;
  1134. }
  1135. else
  1136. {
  1137. $host = $this->host;
  1138. $port = $this->port;
  1139. }
  1140.  
  1141. $this->status = 0;
  1142.  
  1143. if($fp = fsockopen(
  1144. $host,
  1145. $port,
  1146. $errno,
  1147. $errstr,
  1148. $this->_fp_timeout
  1149. ))
  1150. {
  1151. // socket connection succeeded
  1152.  
  1153. return true;
  1154. }
  1155. else
  1156. {
  1157. // socket connection failed
  1158. $this->status = $errno;
  1159. switch($errno)
  1160. {
  1161. case -3:
  1162. $this->error="socket creation failed (-3)";
  1163. case -4:
  1164. $this->error="dns lookup failure (-4)";
  1165. case -5:
  1166. $this->error="connection refused or timed out (-5)";
  1167. default:
  1168. $this->error="connection failed (".$errno.")";
  1169. }
  1170. return false;
  1171. }
  1172. }
  1173. /*======================================================================*\
  1174. Function: _disconnect
  1175. Purpose: disconnect a socket connection
  1176. Input: $fp file pointer
  1177. \*======================================================================*/
  1178.  
  1179. function _disconnect($fp)
  1180. {
  1181. return(fclose($fp));
  1182. }
  1183.  
  1184.  
  1185. /*======================================================================*\
  1186. Function: _prepare_post_body
  1187. Purpose: Prepare post body according to encoding type
  1188. Input: $formvars - form variables
  1189. $formfiles - form upload files
  1190. Output: post body
  1191. \*======================================================================*/
  1192.  
  1193. function _prepare_post_body($formvars, $formfiles)
  1194. {
  1195. settype($formvars, "array");
  1196. settype($formfiles, "array");
  1197. $postdata = '';
  1198.  
  1199. if (count($formvars) == 0 && count($formfiles) == 0)
  1200. return;
  1201.  
  1202. switch ($this->_submit_type) {
  1203. case "application/x-www-form-urlencoded":
  1204. reset($formvars);
  1205. while(list($key,$val) = each($formvars)) {
  1206. if (is_array($val) || is_object($val)) {
  1207. while (list($cur_key, $cur_val) = each($val)) {
  1208. $postdata .= urlencode($key)."[]=".urlencode($cur_val)."&";
  1209. }
  1210. } else
  1211. $postdata .= urlencode($key)."=".urlencode($val)."&";
  1212. }
  1213. break;
  1214.  
  1215. case "multipart/form-data":
  1216. $this->_mime_boundary = "Snoopy".md5(uniqid(microtime()));
  1217.  
  1218. reset($formvars);
  1219. while(list($key,$val) = each($formvars)) {
  1220. if (is_array($val) || is_object($val)) {
  1221. while (list($cur_key, $cur_val) = each($val)) {
  1222. $postdata .= "--".$this->_mime_boundary."\r\n";
  1223. $postdata .= "Content-Disposition: form-data; name=\"$key\[\]\"\r\n\r\n";
  1224. $postdata .= "$cur_val\r\n";
  1225. }
  1226. } else {
  1227. $postdata .= "--".$this->_mime_boundary."\r\n";
  1228. $postdata .= "Content-Disposition: form-data; name=\"$key\"\r\n\r\n";
  1229. $postdata .= "$val\r\n";
  1230. }
  1231. }
  1232.  
  1233. reset($formfiles);
  1234. while (list($field_name, $file_names) = each($formfiles)) {
  1235. settype($file_names, "array");
  1236. while (list(, $file_name) = each($file_names)) {
  1237. if (!is_readable($file_name)) continue;
  1238.  
  1239. $fp = fopen($file_name, "r");
  1240. $file_content = fread($fp, filesize($file_name));
  1241. fclose($fp);
  1242. $base_name = basename($file_name);
  1243.  
  1244. $postdata .= "--".$this->_mime_boundary."\r\n";
  1245. $postdata .= "Content-Disposition: form-data; name=\"$field_name\"; filename=\"$base_name\"\r\n\r\n";
  1246. $postdata .= "$file_content\r\n";
  1247. }
  1248. }
  1249. $postdata .= "--".$this->_mime_boundary."--\r\n";
  1250. break;
  1251. }
  1252.  
  1253. return $postdata;
  1254. }
  1255. }
  1256.  
  1257. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement