Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ---
- config:
- debug: 2
- agent: Firefox
- do:
- - walk:
- to: http://www.betseyjohnson.com/
- do:
- - find:
- path: .off > a
- do:
- - parse:
- attr: href
- - normalize:
- routine: replace_substring
- args:
- '\s*#': ''
- - normalize:
- routine: url
- - link_add:
- pool: main
- - walk:
- to: links
- pool: main
- do:
- - find:
- path: .categoryNav a
- do:
- - parse:
- attr: href
- - walk:
- to: value
- do:
- - find:
- path: .viewAll
- do:
- - parse:
- attr: data-href
- - normalize:
- routine: url
- - walk:
- to: value
- do:
- - find:
- path: .mainImage
- do:
- - parse:
- attr: href
- filter:
- - (.+)\?
- - (.+)
- - normalize:
- routine: url
- - link_add:
- pool: sub
- - find:
- path: .mainImage
- do:
- - parse:
- attr: href
- filter:
- - (.+)\?
- - (.+)
- - normalize:
- routine: url
- - link_add:
- pool: sub
- - walk:
- to: links
- pool: sub
- do:
- - object_new: product
- - find:
- in: doc
- path: head
- do:
- - eval:
- routine: js
- body: '(function (){var d = new Date(); return d.toISOString()})();'
- - object_field_set:
- object: product
- field: date
- - static_get: url
- - object_field_set:
- object: product
- field: url
- - find:
- path: 'meta[itemprop="productID"]'
- do:
- - parse:
- attr: content
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: sku
- - find:
- path: .breadcrumb a
- do:
- - parse
- - space_dedupe
- - trim
- - normalize:
- routine: lower
- - object_field_set:
- object: product
- field: category
- joinby: "|"
- - find:
- path: 'select.COLOR_NAME > option'
- do:
- - parse:
- attr: value
- - space_dedupe
- - trim
- - if:
- match: (\S)
- do:
- - object_field_set:
- object: product
- field: variations
- joinby: "|"
- - find:
- path: .item-name
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: name
- - find:
- path: .productPrice
- do:
- - parse:
- filter:
- - ^\s*\$\s*(\d+\.?\d*)
- - if:
- match: (\d+)
- do:
- - object_field_set:
- object: product
- field: price
- type: float
- - register_set: USD
- - object_field_set:
- object: product
- field: currency
- - find:
- path: script:matches(variantMatrices)
- do:
- - parse:
- filter:
- - \/\/var\s*thumbsAndStuff\s*=\s*(.+);\s*
- - normalize:
- routine: json2xml
- - to_block
- - find:
- path: alts
- do:
- - parse
- - walk:
- to: http://www.betseyjohnson.com/scene7_proxy.jsp?cb=&req=set,json,utf-8&id=<%register%>
- do:
- - find:
- path: body
- do:
- - parse
- - normalize:
- routine: replace_substring
- args:
- - \/\*jsonp\*\/\s*: ''
- - s7jsonResponse\(: ''
- - \}\}\}\,\"\"\)\;: '}}}'
- - normalize:
- routine: unescape_html
- - normalize:
- routine: json2xml
- - to_block
- - find:
- path: item > i > n
- do:
- - parse
- - register_set: http://s7d9.scene7.com/is/image/<%register%>?scl=1
- - object_field_set:
- object: product
- field: images
- joinby: "|"
- - find:
- path: .detailsWrap > p
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: description
- - find:
- path: meta[itemprop="brand"]
- do:
- - parse:
- attr: content
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: brand
- - object_save:
- name: product
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement