Advertisement
Guest User

Untitled

a guest
Apr 20th, 2014
42
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.44 KB | None | 0 0
  1. function scrape($list_url, $shop_name, $photo_location, $photo_url_root, $product_location, $product_url_root, $was_price_location, $now_price_location, $gender, $country, mysqli $con)
  2. {
  3.  
  4. $html = file_get_contents($list_url);
  5. $doc = new DOMDocument();
  6. libxml_use_internal_errors(TRUE);
  7.  
  8. if(!empty($html))
  9. {
  10. $doc->loadHTML($html);
  11. libxml_clear_errors(); // remove errors for yucky html
  12. $xpath = new DOMXPath($doc);
  13.  
  14. /* FIND LINK TO PRODUCT PAGE */
  15.  
  16. $products = array();
  17.  
  18. $row = $xpath->query($product_location);
  19.  
  20. /* Create an array containing products */
  21. if ($row->length > 0)
  22. {
  23. foreach ($row as $location)
  24. {
  25. $product_urls[] = $product_url_root . $location->getAttribute('href');
  26. }
  27. }
  28. else { echo "product location is wrong<br>";}
  29.  
  30. $imgs = $xpath->query($photo_location);
  31.  
  32. /* Create an array containing the image links */
  33. if ($imgs->length > 0)
  34. {
  35. foreach ($imgs as $img)
  36. {
  37. $photo_url[] = $photo_url_root . $img->getAttribute('src');
  38. }
  39. }
  40. else { echo "photo location is wrong<br>";}
  41.  
  42. $was = $xpath->query($was_price_location);
  43.  
  44. /* Create an array containing the was price */
  45. if ($was->length > 0)
  46. {
  47. foreach ($was as $price)
  48. {
  49. $stripped = preg_replace("/[^0-9,.]/", "", $price->nodeValue);
  50. $was_price[] = "&pound;".$stripped;
  51. }
  52. }
  53. else { echo "was price location is wrong<br>";}
  54.  
  55. $now = $xpath->query($now_price_location);
  56.  
  57. /* Create an array containing the sale price */
  58. if ($now->length > 0)
  59. {
  60. foreach ($now as $price)
  61. {
  62. $stripped = preg_replace("/[^0-9,.]/", "", $price->nodeValue);
  63. $now_price[] = "&pound;".$stripped;
  64. }
  65. }
  66. else { echo "now price location is wrong<br>";}
  67.  
  68. $result = array();
  69.  
  70. /* Create an associative array containing all the above values */
  71. foreach ($product_urls as $i => $product_url)
  72. {
  73. $result[] = array(
  74. 'product_url' => $product_url,
  75. 'shop_name' => $shop_name,
  76. 'photo_url' => $photo_url[$i],
  77. 'was_price' => $was_price[$i],
  78. 'now_price' => $now_price[$i]
  79. );
  80. }
  81.  
  82. echo json_encode($result);
  83.  
  84. }
  85. else
  86. {
  87. echo "this is empty";
  88. }
  89. }
  90.  
  91. $list_url = "http://www.asos.com/Women/Sale/70-Off-Sale/Cat/pgecategory.aspx?cid=16903&pge=0&pgesize=1002&sort=-1";
  92. $shop_name = "ASOS";
  93. $photo_location = "//ul[@id='items']/li/div[@class='categoryImageDiv']/*[1]/img";
  94. $photo_url_root = "";
  95. $product_location = "//ul[@id='items']/li/div[@class='categoryImageDiv']/*[1]";
  96. $product_url_root = "http://www.asos.com";
  97. $was_price_location = "//ul[@id='items']/li/div[@class='productprice']/span[@class='price' or @class='recRP rrp']"; // leave recRP rrp
  98. $now_price_location = "//ul[@id='items']/li/div[@class='productprice']/span[@class='prevPrice previousprice' or @class='price outlet-current-price']"; // leave outlet-current-price
  99. $gender = "f";
  100. $country = "UK";
  101.  
  102. scrape($list_url, $shop_name, $photo_location, $photo_url_root, $product_location, $product_url_root, $was_price_location, $now_price_location, $gender, $country, $con);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement