Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- class Functions{
- /*
- * @author - Matt Cain from www.caincode.com
- * (Edited to fit purpose)
- */
- public function scrapeLargestImage($url) {
- // Returns the page content
- $page = file_get_contents($url);
- //Makes sure the page is not null
- if (!$page) {
- return false;
- }
- // Works with internal errors in libxml (basically hides those errors)
- libxml_use_internal_errors(true);
- /*Creates a new document, and loads it from the URL
- * Then returns all img elements */
- $dom = new DOMDocument;
- $dom->loadHTML($page);
- $imgs = $dom->getElementsByTagName('img');
- $imgsVisited = array();
- $maxLen = 0;
- $largest = '';
- // Iterates through each image on the page
- foreach ($imgs as $img) {
- //Returns the image attribute
- $src = $img->getAttribute('src');
- //Checks if the image is null or has been visited
- if (!empty($src) && !isset($imgsVisited[$src])) {
- $imgsVisited[$src] = true;
- $ch = curl_init($src);
- curl_setopt($ch, CURLOPT_NOBODY, true);
- curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
- curl_setopt($ch, CURLOPT_HEADER, true);
- curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
- $data = curl_exec($ch);
- curl_close($ch);
- if ($data === false) {
- continue;
- }
- /* Sets image length to contentLen, then checks if
- * contentLen is larger than the lax length*/
- $contentLen = 0;
- if (preg_match('/Content-Length: (\d+)/', $data, $matches)) {
- $contentLen = (int)$matches[1];
- }
- if ($contentLen > $maxLen) {
- $maxLen = $contentLen;
- $largest = $src;
- }
- }
- }
- //Checks if there is a largest image, returns it's src attribute if so
- if (!empty($largest)) {
- return $largest;
- } else {
- return false;
- }
- }
- }
- ?>
Advertisement
Add Comment
Please, Sign In to add comment