Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- //Header: Setting Charset to Utf-8
- header('Content-type: text/html; charset=utf-8');
- //Function Avas: Scraping Avas.mv using Simple HTML DOM.
- function Avas()
- {
- $article = $_GET["article"]; //Gets Article URL
- $html = file_get_html("$article"); //Gets Article
- $title = $html->find("section[class='content'] h1", 0)->plaintext; //Gets Title of Article
- $image = $html->find("meta[name='image']", 0)->content; //Gets Image URL
- $author = $html->find("div[class='post-author-details']", 0)->plaintext; //Gets Author Name
- $date = $html->find('time', 0)->datetime; //Gets the article published time/date
- $tagline = $html->find('p', 0); //Gets the content
- $content = $tagline . $html->find('p', 1); //Gets the content
- echo "Title: $title <br>"; //Prints the Title
- echo "Image: $image <br>"; //Prints the Image URL
- echo "Author: $author <br>"; //Prints the Author Name
- echo "Date Published: $date <br>"; //Prints the article published time/date
- echo "Content: $content"; //Prints the paragraphs(content) of the article
- }
- //Function Mihaaru: Scraping Mihaaru.com using Simple HTML DOM.
- function Mihaaru()
- {
- $article = $_GET["article"]; //Gets Article URL
- $html = file_get_html("$article"); //Gets Article
- $title = $html->find('h1', 0)->plaintext; //Gets the title of the article
- $image = $html->find("img[data-index='1']", 0)->src; //Gets Image URL
- $author = $html->find("address", 0)->plaintext; //Gets Author Name
- $date = $html->find("span[class='date-time']", 0)->plaintext; //Gets the date/time
- $content = $html->find("article", 0)->plaintext; //Gets the content
- echo "Title: $title <br>"; //Prints the Title
- echo "Image: $image <br>"; //Prints the Image URL
- echo "Author: $author <br>"; //Prints the Author Name
- echo "Date Published: $date <br>"; //Prints the Date/Time Published
- echo "Content: $content"; //Prints the paragraphs(content) of the article
- }
- //Function Sun: Scraping Sun.mv using Simple HTML DOM
- function Sun()
- {
- Echo "Sun.mv is down at the moment";
- }
- //Function Vaguthu: Scraping Vaguthu.mv using Simple HTML DOM
- function Vaguthu()
- {
- $article = $_GET["article"]; //Gets Article URL
- $html = file_get_html("$article"); //Gets Article
- $title = $html->find('h1', 0)->plaintext; //Gets the title of the article
- $image = $html->find("meta[property='og:image']", 0)->content; //Gets Image URL
- $author = $html->find("span[class='single-article--meta-author']", 0)->plaintext; //Gets Author Name
- $date = $html->find('time', 0)->datetime; //Gets the date/time
- $content = $html->find("div[class='single-article--content']", 0)->plaintext; //Gets the content
- echo "Title: $title <br>"; //Prints the Title
- echo "Image: $image <br>"; //Prints the Image URL
- echo "Author: $author <br>"; //Prints the Author Name
- echo "Date Published: $date <br>"; //Prints the Date/Time Published
- echo "Content: " . str_replace("ADVERTISEMENT", "", "$content"); //Prints the content after removing "ADVERTISEMENT" included in some articles
- }
- //Function Dhuvas: Scraping Dhuvas.mv using Simple HTML DOM
- function Dhuvas()
- {
- Echo "Dhuvas is down at the moment";
- }
- //Function AdduLive: Scraping AdduLive.com using Simple HTML DOM
- function AdduLive()
- {
- $article = $_GET["article"]; //Gets Article URL
- $html = file_get_html("$article"); //Gets Article
- $title = $html->find("div[class='post-title']", 0)->plaintext; //Gets the title of the article
- $image = $html->find("div[class='post-featured-image'] img", 0)->src; //Gets the Image URL
- $author = $html->find("div[class='post-author']", 0)->plaintext; //Gets Author Name
- $date = $html->find("div[class='post-published-time']", 0)->plaintext; //Gets date/time
- $content = $html->find("div[class='post-body-left col-xs-12 col-sm-9'] 'p'", 0)->plaintext; //Gets the content
- echo "Title: $title <br>"; //Prints the title
- echo "Image: $image <br>"; //Prints the Image URL
- echo "Author: $author <br>"; //Prints the Author Name
- echo "Published Date: $date <br>"; //Prints the Date/Time Published
- echo "Content: " . str_replace(" Click to share on Facebook (Opens in new window) Click to share on Twitter (Opens in new window) Click to print (Opens in new window) Click to email this to a friend (Opens in new window) ", "", "$content"); //Prints after removing Social Media Content.
- }
- //Function MVSports: Scraping mvsports.mv using Simple HTML DOM
- function MVSports()
- {
- $article = $_GET["article"];
- $html = file_get_html("$article");
- $title = $html->find("header[class='td-post-title'] h1", 0)->plaintext;
- $image = $html->find("div[class='td-post-featured-image'] img", 0)->src;
- $author = $html->find("div[class='td-post-author-name'] a", 0)->plaintext;
- $date = $html->find("div[class='td-post-date'] time", 0)->plaintext;
- $content = $html->find("div[class='td-post-content] 'p'", 0)->plaintext;
- echo "Title: $title <br>";
- echo "Image: $image <br>";
- echo "Author: $author <br>";
- echo "Published Date: $date <br>";
- echo "Content: $content";
- }
- //Function AdduOnline: Scraping adduonline.com
- function AdduOnline()
- {
- $article = $_GET["article"];
- $html = file_get_html("$article");
- $title = $html->find("div[class='post-title'] h2", 0)->plaintext
- }
- //Function InvalidUrl: Shows error message and disables scraping other sites.
- function InvalidUrl()
- {
- echo "The url specified is not acceptable in our scraper. Please use a correct url. Accepted urls are listed in below <br>";
- echo "Sites scrapable are: Avas, Mihaaru, Vaguthu, Sun, MVSports, AdduLive, Dhuvas";
- }
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement