Advertisement
AbdulMuttaqin

Facebook Page Scraper

Nov 27th, 2019
1,405
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 2.04 KB | None | 0 0
  1. <?php
  2.  
  3. function fbPageScraper( $page_name = '' )  {
  4.     $post_url = "https://www.facebook.com/pg/{$page_name}/posts";
  5.    
  6.     $ch = curl_init($post_url);
  7.     curl_setopt($ch, CURLOPT_POST, false);
  8.     curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
  9.     curl_setopt($ch, CURLOPT_USERAGENT, "Mozilla/5.0 (Windows; U; Windows NT 5.0; en-US; rv:1.7.12) Gecko/20050915 Firefox/1.0.7");
  10.     curl_setopt($ch, CURLOPT_HEADER, false);
  11.     curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
  12.     $contents = curl_exec($ch);
  13.    
  14.     $doc = new DOMDocument();
  15.     $doc->loadHTML($contents);
  16.     $xpath = new DOMXPath($doc);
  17.     $query = "//div[@data-testid='post_message']";
  18.     $entries = $xpath->query($query);
  19.    
  20.     $posts = array();
  21.     foreach ($entries as $entry) {
  22.         $innerHTML = '';
  23.         if ($entry->getElementsByTagName('p')->length) {
  24.             foreach ($entry->getElementsByTagName('p') as $child) {
  25.                 $innerHTML .= $child->ownerDocument->saveXML($child);
  26.             }
  27.         }
  28.    
  29.         $images = array();
  30.         if ($entry->nextSibling && $entry->nextSibling->getElementsByTagName('img')->length) {
  31.             foreach ($entry->nextSibling->getElementsByTagName('img') as $image) {
  32.                 $images[] = $image->getAttribute('src');
  33.             }
  34.         }
  35.    
  36.         $href = array();
  37.         if ($entry->nextSibling && $entry->nextSibling->getElementsByTagName('a')->length) {
  38.             foreach ($entry->nextSibling->getElementsByTagName('a') as $anchor) {
  39.                 if (strpos($anchor->getAttribute('href'), 'https://l.facebook.com') === false) {
  40.                     $href[] = 'https://www.facebook.com' . $anchor->getAttribute('href');
  41.                 } else {
  42.                     $href[] = $anchor->getAttribute('href');
  43.                 }
  44.    
  45.             }
  46.         }
  47.    
  48.         $posts[] = array(
  49.             'content' => $innerHTML,
  50.             'images' => count($images) ? $images[0] : '',
  51.             'href' => count($href) ? $href[0] : '',
  52.         );
  53.     }
  54.  
  55.     return $posts;
  56. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement