Advertisement
am_dot_com

CN20210324

Mar 24th, 2021 (edited)
127
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 2.11 KB | None | 0 0
  1. <?php
  2. require "./vendor/autoload.php";
  3.  
  4. define (
  5.     "USER_AGENT_STRING_MOZ47",
  6.     "Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0"
  7. );
  8.  
  9.  
  10. /*
  11.  * recebe source code HTML
  12.  * retorna um array, indexado por URL, das âncoras encontradas
  13.  * [
  14.    "https://site.com/blabla" => "As melhores batatas",
  15.    ...
  16.    "http://outro" => "outra âncora"
  17.   ]
  18.  */
  19.  
  20. function extractAsFromHtml(
  21.     string $pStrHtml
  22. ){
  23.     $ret = [];
  24.     $doc = new \DOMDocument();
  25.     if ($doc){
  26.         $bTrueOrFalse = @$doc->loadHTML($pStrHtml);
  27.         if ($bTrueOrFalse){
  28.             $as = $doc->getElementsByTagName('a');
  29.             foreach ($as as $a){
  30.                 $href = $a->getAttribute('href');
  31.                 $anchor = $a->nodeValue;
  32.                 //$ret[] = ['href'=>$href, 'anchor'=>$anchor];
  33.                 $bNew = !array_key_exists($href, $ret);
  34.                 if ($bNew) $ret[$href] = $anchor;
  35.             }
  36.         }
  37.     }
  38.     return $ret;
  39. }//extractAsFromHtml
  40. $ch = curl_init("https://www.google.com/search?q=Alain+Prost");
  41. //$ch = curl_init("https://arturmarques.com/");
  42. if ($ch){
  43.     $bTrueOrFalse = curl_setopt($ch, CURLOPT_USERAGENT, USER_AGENT_STRING_MOZ47);
  44.  
  45.     $bTrueOrFalse = curl_setopt($ch, CURLOPT_HTTPGET, true); //GET REQUEST
  46.     $bTrueOrFalse = curl_setopt($ch, CURLOPT_POST, false); //POST REQUEST
  47.  
  48.     $bTrueOrFalse = curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); //obter os dados na origem
  49.     $bTrueOrFalse = curl_setopt($ch, CURLOPT_ENCODING, ""); //encoding automático
  50.     $bTrueOrFalse = curl_setopt($ch, CURLOPT_VERBOSE, true); //+ informação
  51.  
  52.     $bTrueOrFalse = curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); //curl segue http-redirections
  53.     $bTrueOrFalse = curl_setopt($ch, CURLOPT_MAXREDIRS, 2); //curl estabelece limite máximo para redireções
  54.  
  55.     $dataAtSourceOrJustABooleanIfReturnTransferIsFalse = curl_exec($ch);
  56.  
  57.     $as = extractAsFromHtml($dataAtSourceOrJustABooleanIfReturnTransferIsFalse);
  58.     var_dump ($as);
  59.  
  60.     //echo $dataAtSourceOrJustABooleanIfReturnTransferIsFalse;
  61. }
  62. else{
  63.     echo "Could not init the CURL object";
  64. }
  65.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement