Advertisement
Guest User

Untitled

a guest
May 6th, 2016
182
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.39 KB | None | 0 0
  1. <?php
  2. include('libraries/simple_html_dom.php');
  3. include('snippets/db-connection.php');
  4. $html = file_get_html('http://73.202.253.245/uniqueitempages/Alluniqueitems.html');
  5.  
  6. class Timer { //This timer just tracks how long the database takes to update.
  7. public $time = null;
  8. public function __construct() {
  9. $this->time = time();
  10. echo 'Working - please wait...</br>';
  11. }
  12.  
  13. public function __destruct() {
  14. echo '<br/>Job finished in ' . (time() - $this->time) . ' seconds.';
  15. }
  16. }
  17. $t = new Timer(); //Instance of my timer class.
  18.  
  19. //SQL table creation!
  20. $sql = "CREATE TABLE table_item_info (
  21. id INT(6) UNSIGNED AUTO_INCREMENT PRIMARY KEY,
  22. title VARCHAR(256) NOT NULL,
  23. league_one VARCHAR(256),
  24. currency_one VARCHAR(256),
  25. price_one DECIMAL(11,3),
  26. league_two VARCHAR(256),
  27. currency_two VARCHAR(256),
  28. price_two DECIMAL(11,3),
  29. league_three VARCHAR(256),
  30. currency_three VARCHAR(256),
  31. price_three DECIMAL(11,3),
  32. league_four VARCHAR(256),
  33. currency_four VARCHAR(256),
  34. price_four DECIMAL(11,3)
  35. )";
  36.  
  37. if (!mysqli_query($conn, $sql)) {
  38. echo "Error creating table: " . mysqli_error($conn);
  39. }
  40.  
  41. foreach($html->find('a[href^=/uniqueitempages/]') as $uniqueItems){
  42. $item['price'] = array();
  43. $item['league'] = array();
  44. $item['currency'] = array();
  45. $path = $uniqueItems->href;
  46. $url = 'http://73.202.253.245' . $path;
  47.  
  48. //We're using cURL as our web-scraping mechanism. The old file_get_html only worked intermittently.
  49. $curl = curl_init($url);
  50. curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
  51. curl_setopt($curl, CURLOPT_URL, trim($url));
  52. curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, 0);
  53. curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, 0);
  54. curl_setopt($curl, CURLOPT_CONNECTTIMEOUT, 0);
  55. curl_setopt($curl, CURLOPT_TIMEOUT, 1200); //Amount of time I let cURL execute for.
  56. $page = curl_exec($curl);
  57. if(curl_errno($curl)) {
  58. echo 'Scraping error - you suck: ' . curl_error($curl);
  59. exit; }
  60. curl_close($curl);
  61.  
  62. //Here we use DOM to begin collecting specific cURLed values we want in our SQL table.
  63. $dom = new DOMDocument;
  64. $dom->encoding = 'utf-8'; //Alows the DOM to display html entities for special characters like รถ.
  65. @$dom->loadHTML(utf8_decode($page)); //Loads the HTML of the cURLed page.
  66. $xpath = new DOMXpath($dom); //Allows us to use Xpath values.
  67.  
  68. //Xpaths that we use:
  69. $header = $xpath->query('(//div[@id="wrapper"]//p)[@class="header"][1]'); //Xpath for the item name.
  70. $price = $xpath->query('//tr[@class="price_tr"]/td[2]'); //Xpath for the item price.
  71. $currency = $xpath->query('//tr[@class="price_tr"]/td[3]'); //Xpath for the item currency type (exalted or chaos).
  72. $league = $xpath->query('//td[@class="left-column"]/p[1]'); //Xpath for the item league.
  73.  
  74. //Here we collect specifically the item name from the DOM.
  75. foreach($header as $e) {
  76. $temp = new DOMDocument();
  77. $temp->appendChild($temp->importNode($e,TRUE));
  78. $val = $temp->saveHTML();
  79. $val = strip_tags($val); //Removes the <p> tag from the data that goes into SQL.
  80. $val = mb_convert_encoding($val, 'html-entities', 'utf-8'); //Allows the HTML entity for special characters to be handled.
  81. $val = html_entity_decode($val); //Converts HTML entities for special characters to the actual character value.
  82. $final = mysqli_real_escape_string($conn, trim($val)); //Defense against SQL injection attacks by canceling out single apostrophes in item names.
  83. $item['title'] = $final; //Here's the item name, ready for the SQL table.
  84. }
  85.  
  86. //Here we collect specifically the item prices as an array from the DOM.
  87. foreach($price as $e) {
  88. $temp = new DOMDocument();
  89. $temp->appendChild($temp->importNode($e,TRUE));
  90. $val = $temp->saveHTML();
  91. $val = strip_tags($val); //Removes the <p> tag from the data that goes into SQL.
  92. $item['price'][] = $val; //Here's the item prices, ready for the SQL table.
  93. }
  94.  
  95. //Here we collect specifically the currency type as an array from the DOM.
  96. foreach($currency as $e) {
  97. $temp = new DOMDocument();
  98. $temp->appendChild($temp->importNode($e,TRUE));
  99. $val = $temp->saveHTML();
  100. $val = strip_tags($val); //Removes the <p> tag from the data that goes into SQL.
  101. $item['currency'][] = $val; //Here's the item currency types, ready for the SQL table.
  102. }
  103.  
  104. //Here we collect specifically the league name as an array from the DOM.
  105. foreach($league as $e) {
  106. $temp = new DOMDocument();
  107. $temp->appendChild($temp->importNode($e,TRUE));
  108. $val = $temp->saveHTML();
  109. $val = strip_tags($val); //Removes the <p> tag from the data that goes into SQL.
  110. $item['league'][] = $val; //Here's the item currency types, ready for the SQL table.
  111. }
  112.  
  113. //Add all the data to the SQL table.
  114. @$sql_insert = "INSERT INTO table_item_info (title, league_one, price_one, currency_one, league_two, price_two, currency_two, league_three, price_three, currency_three, league_four, price_four, currency_four)"
  115. . "VALUES ('{$item['title']}', '{$item['league'][1]}', '{$item['price'][0]}', '{$item['currency'][0]}', '{$item['league'][2]}', '{$item['price'][1]}', '{$item['currency'][1]}', '{$item['league'][3]}', '{$item['price'][2]}', '{$item['currency'][2]}', '{$item['league'][4]}', '{$item['price'][3]}', '{$item['currency'][3]}')";
  116.  
  117. if (!mysqli_query($conn, $sql_insert)) {
  118. echo "Error creating table: " . mysqli_error($conn);
  119. }
  120.  
  121. // exit;
  122. }
  123. mysqli_close($conn);
  124.  
  125.  
  126. unset($t); //Report how long it took.
  127.  
  128.  
  129. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement