Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- include('libraries/simple_html_dom.php');
- include('snippets/db-connection.php');
- $html = file_get_html('http://73.202.253.245/uniqueitempages/Alluniqueitems.html');
- class Timer { //This timer just tracks how long the database takes to update.
- public $time = null;
- public function __construct() {
- $this->time = time();
- echo 'Working - please wait...</br>';
- }
- public function __destruct() {
- echo '<br/>Job finished in ' . (time() - $this->time) . ' seconds.';
- }
- }
- $t = new Timer(); //Instance of my timer class.
- //SQL table creation!
- $sql = "CREATE TABLE table_item_info (
- id INT(6) UNSIGNED AUTO_INCREMENT PRIMARY KEY,
- title VARCHAR(256) NOT NULL,
- league_one VARCHAR(256),
- currency_one VARCHAR(256),
- price_one DECIMAL(11,3),
- league_two VARCHAR(256),
- currency_two VARCHAR(256),
- price_two DECIMAL(11,3),
- league_three VARCHAR(256),
- currency_three VARCHAR(256),
- price_three DECIMAL(11,3),
- league_four VARCHAR(256),
- currency_four VARCHAR(256),
- price_four DECIMAL(11,3)
- )";
- if (!mysqli_query($conn, $sql)) {
- echo "Error creating table: " . mysqli_error($conn);
- }
- foreach($html->find('a[href^=/uniqueitempages/]') as $uniqueItems){
- $item['price'] = array();
- $item['league'] = array();
- $item['currency'] = array();
- $path = $uniqueItems->href;
- $url = 'http://73.202.253.245' . $path;
- //We're using cURL as our web-scraping mechanism. The old file_get_html only worked intermittently.
- $curl = curl_init($url);
- curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
- curl_setopt($curl, CURLOPT_URL, trim($url));
- curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 0);
- curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, 0);
- curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 0);
- curl_setopt($curl, CURLOPT_TIMEOUT, 1200); //Amount of time I let cURL execute for.
- $page = curl_exec($curl);
- if(curl_errno($curl)) {
- echo 'Scraping error - you suck: ' . curl_error($curl);
- exit; }
- curl_close($curl);
- //Here we use DOM to begin collecting specific cURLed values we want in our SQL table.
- $dom = new DOMDocument;
- $dom->encoding = 'utf-8'; //Alows the DOM to display html entities for special characters like รถ.
- @$dom->loadHTML(utf8_decode($page)); //Loads the HTML of the cURLed page.
- $xpath = new DOMXpath($dom); //Allows us to use Xpath values.
- //Xpaths that we use:
- $header = $xpath->query('(//div[@id="wrapper"]//p)[@class="header"][1]'); //Xpath for the item name.
- $price = $xpath->query('//tr[@class="price_tr"]/td[2]'); //Xpath for the item price.
- $currency = $xpath->query('//tr[@class="price_tr"]/td[3]'); //Xpath for the item currency type (exalted or chaos).
- $league = $xpath->query('//td[@class="left-column"]/p[1]'); //Xpath for the item league.
- //Here we collect specifically the item name from the DOM.
- foreach($header as $e) {
- $temp = new DOMDocument();
- $temp->appendChild($temp->importNode($e,TRUE));
- $val = $temp->saveHTML();
- $val = strip_tags($val); //Removes the <p> tag from the data that goes into SQL.
- $val = mb_convert_encoding($val, 'html-entities', 'utf-8'); //Allows the HTML entity for special characters to be handled.
- $val = html_entity_decode($val); //Converts HTML entities for special characters to the actual character value.
- $final = mysqli_real_escape_string($conn, trim($val)); //Defense against SQL injection attacks by canceling out single apostrophes in item names.
- $item['title'] = $final; //Here's the item name, ready for the SQL table.
- }
- //Here we collect specifically the item prices as an array from the DOM.
- foreach($price as $e) {
- $temp = new DOMDocument();
- $temp->appendChild($temp->importNode($e,TRUE));
- $val = $temp->saveHTML();
- $val = strip_tags($val); //Removes the <p> tag from the data that goes into SQL.
- $item['price'][] = $val; //Here's the item prices, ready for the SQL table.
- }
- //Here we collect specifically the currency type as an array from the DOM.
- foreach($currency as $e) {
- $temp = new DOMDocument();
- $temp->appendChild($temp->importNode($e,TRUE));
- $val = $temp->saveHTML();
- $val = strip_tags($val); //Removes the <p> tag from the data that goes into SQL.
- $item['currency'][] = $val; //Here's the item currency types, ready for the SQL table.
- }
- //Here we collect specifically the league name as an array from the DOM.
- foreach($league as $e) {
- $temp = new DOMDocument();
- $temp->appendChild($temp->importNode($e,TRUE));
- $val = $temp->saveHTML();
- $val = strip_tags($val); //Removes the <p> tag from the data that goes into SQL.
- $item['league'][] = $val; //Here's the item currency types, ready for the SQL table.
- }
- //Add all the data to the SQL table.
- @$sql_insert = "INSERT INTO table_item_info (title, league_one, price_one, currency_one, league_two, price_two, currency_two, league_three, price_three, currency_three, league_four, price_four, currency_four)"
- . "VALUES ('{$item['title']}', '{$item['league'][1]}', '{$item['price'][0]}', '{$item['currency'][0]}', '{$item['league'][2]}', '{$item['price'][1]}', '{$item['currency'][1]}', '{$item['league'][3]}', '{$item['price'][2]}', '{$item['currency'][2]}', '{$item['league'][4]}', '{$item['price'][3]}', '{$item['currency'][3]}')";
- if (!mysqli_query($conn, $sql_insert)) {
- echo "Error creating table: " . mysqli_error($conn);
- }
- // exit;
- }
- mysqli_close($conn);
- unset($t); //Report how long it took.
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement