Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- function scrape($list_url, $shop_name, $photo_location, $photo_url_root, $product_location, $product_url_root, $was_price_location, $now_price_location, $gender, $country, mysqli $con)
- {
- $html = file_get_contents($list_url);
- $doc = new DOMDocument();
- libxml_use_internal_errors(TRUE);
- if(!empty($html))
- {
- $doc->loadHTML($html);
- libxml_clear_errors(); // remove errors for yucky html
- $xpath = new DOMXPath($doc);
- /* FIND LINK TO PRODUCT PAGE */
- $products = array();
- $row = $xpath->query($product_location);
- /* Create an array containing products */
- if ($row->length > 0)
- {
- foreach ($row as $location)
- {
- $product_urls[] = $product_url_root . $location->getAttribute('href');
- }
- }
- else { echo "product location is wrong<br>";}
- $imgs = $xpath->query($photo_location);
- /* Create an array containing the image links */
- if ($imgs->length > 0)
- {
- foreach ($imgs as $img)
- {
- $photo_url[] = $photo_url_root . $img->getAttribute('src');
- }
- }
- else { echo "photo location is wrong<br>";}
- $was = $xpath->query($was_price_location);
- /* Create an array containing the was price */
- if ($was->length > 0)
- {
- foreach ($was as $price)
- {
- $stripped = preg_replace("/[^0-9,.]/", "", $price->nodeValue);
- $was_price[] = "£".$stripped;
- }
- }
- else { echo "was price location is wrong<br>";}
- $now = $xpath->query($now_price_location);
- /* Create an array containing the sale price */
- if ($now->length > 0)
- {
- foreach ($now as $price)
- {
- $stripped = preg_replace("/[^0-9,.]/", "", $price->nodeValue);
- $now_price[] = "£".$stripped;
- }
- }
- else { echo "now price location is wrong<br>";}
- $result = array();
- /* Create an associative array containing all the above values */
- foreach ($product_urls as $i => $product_url)
- {
- $result[] = array(
- 'product_url' => $product_url,
- 'shop_name' => $shop_name,
- 'photo_url' => $photo_url[$i],
- 'was_price' => $was_price[$i],
- 'now_price' => $now_price[$i]
- );
- }
- echo json_encode($result);
- }
- else
- {
- echo "this is empty";
- }
- }
- $list_url = "http://www.asos.com/Women/Sale/70-Off-Sale/Cat/pgecategory.aspx?cid=16903&pge=0&pgesize=1002&sort=-1";
- $shop_name = "ASOS";
- $photo_location = "//ul[@id='items']/li/div[@class='categoryImageDiv']/*[1]/img";
- $photo_url_root = "";
- $product_location = "//ul[@id='items']/li/div[@class='categoryImageDiv']/*[1]";
- $product_url_root = "http://www.asos.com";
- $was_price_location = "//ul[@id='items']/li/div[@class='productprice']/span[@class='price' or @class='recRP rrp']"; // leave recRP rrp
- $now_price_location = "//ul[@id='items']/li/div[@class='productprice']/span[@class='prevPrice previousprice' or @class='price outlet-current-price']"; // leave outlet-current-price
- $gender = "f";
- $country = "UK";
- scrape($list_url, $shop_name, $photo_location, $photo_url_root, $product_location, $product_url_root, $was_price_location, $now_price_location, $gender, $country, $con);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement