Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- require "./vendor/autoload.php";
- define (
- "USER_AGENT_STRING_MOZ47",
- "Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0"
- );
- /*
- * recebe source code HTML
- * retorna um array, indexado por URL, das âncoras encontradas
- * [
- "https://site.com/blabla" => "As melhores batatas",
- ...
- "http://outro" => "outra âncora"
- ]
- */
- function extractAsFromHtml(
- string $pStrHtml
- ){
- $ret = [];
- $doc = new \DOMDocument();
- if ($doc){
- $bTrueOrFalse = @$doc->loadHTML($pStrHtml);
- if ($bTrueOrFalse){
- $as = $doc->getElementsByTagName('a');
- foreach ($as as $a){
- $href = $a->getAttribute('href');
- $anchor = $a->nodeValue;
- //$ret[] = ['href'=>$href, 'anchor'=>$anchor];
- $bNew = !array_key_exists($href, $ret);
- if ($bNew) $ret[$href] = $anchor;
- }
- }
- }
- return $ret;
- }//extractAsFromHtml
- $ch = curl_init("https://www.google.com/search?q=Alain+Prost");
- //$ch = curl_init("https://arturmarques.com/");
- if ($ch){
- $bTrueOrFalse = curl_setopt($ch, CURLOPT_USERAGENT, USER_AGENT_STRING_MOZ47);
- $bTrueOrFalse = curl_setopt($ch, CURLOPT_HTTPGET, true); //GET REQUEST
- $bTrueOrFalse = curl_setopt($ch, CURLOPT_POST, false); //POST REQUEST
- $bTrueOrFalse = curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); //obter os dados na origem
- $bTrueOrFalse = curl_setopt($ch, CURLOPT_ENCODING, ""); //encoding automático
- $bTrueOrFalse = curl_setopt($ch, CURLOPT_VERBOSE, true); //+ informação
- $bTrueOrFalse = curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); //curl segue http-redirections
- $bTrueOrFalse = curl_setopt($ch, CURLOPT_MAXREDIRS, 2); //curl estabelece limite máximo para redireções
- $dataAtSourceOrJustABooleanIfReturnTransferIsFalse = curl_exec($ch);
- $as = extractAsFromHtml($dataAtSourceOrJustABooleanIfReturnTransferIsFalse);
- var_dump ($as);
- //echo $dataAtSourceOrJustABooleanIfReturnTransferIsFalse;
- }
- else{
- echo "Could not init the CURL object";
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement