Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ---
- config:
- debug: 2
- agent: Firefox
- do:
- - link_add:
- url:
- - http://us.asos.com/women/a-to-z-of-brands/cat/?cid=1340
- - http://us.asos.com/men/a-to-z-of-brands/cat/?cid=1361
- - walk:
- to: links
- do:
- - pool_clear: main
- - find:
- path: .brands-list a
- do:
- - parse:
- attr: href
- - normalize:
- routine: replace_substring
- args:
- \s*$: '&pgesize=204'
- - link_add:
- pool: main
- - walk:
- to: links
- pool: main
- do:
- - find:
- path: li.next a
- do:
- - parse:
- attr: href
- - normalize:
- routine: url
- - link_add:
- pool: main
- - find:
- path: .product
- do:
- - parse:
- attr: href
- - link_add:
- pool: sub
- - walk:
- to: links
- pool: sub
- do:
- - variable_clear: isP
- - variable_clear: allli
- - variable_clear: sdescr
- - variable_clear: li
- - variable_clear: json
- - variable_clear: id
- - find:
- path: script:matches(Pages/FullProduct)
- do:
- - variable_set:
- field: isP
- value: 1
- - parse:
- filter:
- - view\(\'\s*(.+)\',
- - normalize:
- routine: replace_substring
- args:
- \\\': ''
- - normalize:
- routine: unescape_html
- - variable_set: json
- - find:
- path: html
- do:
- - variable_get: isP
- - if:
- match: (1)
- do:
- - object_new: product
- - find:
- path: head
- do:
- - eval:
- routine: js
- body: '(function (){var d = new Date(); return d.toISOString()})();'
- - object_field_set:
- object: product
- field: date
- - static_get: url
- - object_field_set:
- object: product
- field: url
- - find:
- path: '#breadcrumb li'
- slice: 1:-2
- do:
- - parse
- - space_dedupe
- - trim
- - normalize:
- routine: replace_matched
- args:
- A\s*To\s*Z\s*Of\s*Brands: ''
- - if:
- match: (\S)
- do:
- - object_field_set:
- object: product
- field: category
- joinby: "|"
- - find:
- path: .product-code > span
- do:
- - parse
- - space_dedupe
- - trim
- - if:
- match: (\S)
- do:
- - object_field_set:
- object: product
- field: sku
- - find:
- path: meta[name="description"]
- do:
- - parse:
- attr: content
- - space_dedupe
- - trim
- - if:
- match: (\S)
- do:
- - object_field_set:
- object: product
- field: description
- - find:
- path: body
- do:
- - variable_get: json
- - normalize:
- routine: replace_substring
- args:
- '\\\\': '\'
- - normalize:
- routine: json2xml
- - to_block
- - find:
- path: brandname
- do:
- - parse
- - space_dedupe
- - trim
- - if:
- match: (\S)
- do:
- - object_field_set:
- object: product
- field: brand
- - find:
- path: body_safe > name
- do:
- - parse
- - space_dedupe
- - trim
- - if:
- match: (\S)
- do:
- - object_field_set:
- object: product
- field: name
- - find:
- path: images
- do:
- - find:
- path: colour
- do:
- - parse
- - space_dedupe
- - trim
- - if:
- match: (\S)
- do:
- - object_field_set:
- object: product
- field: variations
- joinby: "|"
- - find:
- path: url
- do:
- - parse
- - space_dedupe
- - trim
- - if:
- match: (\S)
- do:
- - normalize:
- routine: replace_substring
- args:
- \s*$: ?scl=1
- - object_field_set:
- object: product
- field: images
- joinby: "|"
- - find:
- path: price > current
- do:
- - parse:
- filter:
- - (\d+\.?\d*)
- - if:
- match: (\d)
- do:
- - object_field_set:
- object: product
- field: price
- type: float
- - register_set: USD
- - object_field_set:
- object: product
- field: currency
- - object_save:
- name: product
Advertisement
Add Comment
Please, Sign In to add comment