Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ---
- config:
- debug: 2
- do:
- - pool_clear: pages
- - walk:
- to: https://www.ae.com
- do:
- - find:
- path: a.third-level,a.second-level-item
- do:
- - parse:
- attr: href
- - normalize:
- routine: url
- - link_add:
- pool: catalog
- - walk:
- to: links
- pool: catalog
- do:
- - find:
- path: div.product-tile>div.product-details-container>a[data-qa-link-to="pdp"]
- do:
- - parse:
- attr: href
- filter: ^([^\?]+)
- - normalize:
- routine: url
- - link_add:
- pool: pages
- - walk:
- to: links
- pool: pages
- do:
- - sleep: 3
- - find:
- path: div.container-fluid[itemtype="http://schema.org/Product"]
- do:
- - variable_clear: pid
- - object_new: product
- - eval:
- routine: js
- body: '(function (){var d = new Date(); return d.toISOString()})();'
- - object_field_set:
- object: product
- field: date
- - static_get: url
- - object_field_set:
- object: product
- field: url
- - find:
- path: h1.psp-product-name
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: name
- - find:
- path: span.pdp-about-cs
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: sku
- - find:
- in: doc
- path: script:contains("var $AWP_ENV")
- do:
- - parse:
- filter: var\s+\$AWP_ENV\s+\=\s+(.+)\s*$
- - normalize:
- routine: json2xml
- - to_block
- - find:
- path: body_safe>viewdata>availableproducts
- do:
- - find:
- path: brandname
- slice: 0
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: brand
- - find:
- path: colorname
- do:
- - parse
- - space_dedupe
- - trim
- - if:
- match: \w+
- do:
- - object_field_set:
- object: product
- joinby: "|"
- field: variations
- - find:
- path: largeimages
- do:
- - parse
- - space_dedupe
- - trim
- - if:
- match: \w+
- do:
- - normalize:
- routine: url
- - variable_set: iurl
- - register_set: "<%iurl%>?scl=1"
- - object_field_set:
- object: product
- joinby: "|"
- field: images
- - find:
- path: span.psp-product-regularprice[itemprop="priceCurrency"]
- do:
- - parse:
- attr: content
- - object_field_set:
- object: product
- field: currency
- - find:
- path: span.psp-product-regularprice[itemprop="price"]
- do:
- - parse:
- attr: content
- - object_field_set:
- object: product
- type: float
- field: price
- - find:
- path: span.psp-product-saleprice[itemprop="priceCurrency"],span.psp-product-sale-currency[itemprop="priceCurrency"]
- do:
- - parse:
- attr: content
- - object_field_set:
- object: product
- field: currency
- - find:
- path: span.psp-product-saleprice[itemprop="price"]
- do:
- - parse:
- attr: content
- - object_field_set:
- object: product
- type: float
- field: price
- - find:
- in: doc
- path: div.pdp-about-details-equit
- slice: 0
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: description
- - find:
- in: doc
- path: ol.breadcrumb>li.bc-item
- slice: 0:-2
- do:
- - variable_clear: list_item
- - parse
- - space_dedupe
- - trim
- - if:
- match: \w{2,}
- do:
- - object_field_set:
- object: product
- joinby: "|"
- field: category
- - object_save:
- name: product
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement