Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ---
- config:
- debug: 2
- agent: Firefox
- do:
- - walk:
- to: https://www.anthropologie.com
- do:
- - find:
- path: .c-main-navigation__li--level-1
- do:
- - find:
- path: span
- slice: 0
- do:
- - parse
- - space_dedupe
- - trim
- - normalize:
- routine: lower
- - variable_set: cat1
- - find:
- path: .c-main-navigation__li--level-2
- do:
- - variable_clear: subcat
- - find:
- path: .c-main-navigation__a--level-2
- do:
- - parse
- - space_dedupe
- - trim
- - normalize:
- routine: lower
- - variable_set: cat2
- - find:
- path: .c-main-navigation__li--level-3 a
- do:
- - parse
- - space_dedupe
- - trim
- - normalize:
- routine: lower
- - variable_set: cat3
- - variable_set:
- field: subcat
- value: 1
- - parse:
- attr: href
- - pool_clear: main
- - link_add:
- pool: main
- - walk:
- to: links
- pool: main
- do:
- - find:
- path: .js-pagination__arrow--next
- slice: 0
- do:
- - parse:
- attr: href
- - link_add:
- pool: main
- - find:
- path: .c-product-tile__image-link
- do:
- - parse:
- attr: href
- filter:
- - (.+)\?
- - (.+)
- - normalize:
- routine: url
- - walk:
- to: value
- do:
- - find:
- path: body
- do:
- - object_new: product
- - eval:
- routine: js
- body: '(function (){var d = new Date(); return d.toISOString()})();'
- - object_field_set:
- object: product
- field: date
- - register_set: Anthropologie
- - object_field_set:
- object: product
- field: brand
- - static_get: url
- - object_field_set:
- object: product
- field: url
- - find:
- path: meta[property="product:price:amount"]
- do:
- - parse:
- attr: content
- - if:
- match: (\d)
- do:
- - object_field_set:
- object: product
- field: price
- type: float
- - register_set: USD
- - object_field_set:
- object: product
- field: currency
- - find:
- path: .o-carousel__flex-wrapper > img.c-product-image
- do:
- - parse:
- attr: src
- filter:
- - (.+)\?
- - (.+)
- - normalize:
- routine: url
- - object_field_set:
- object: product
- field: images
- joinby: "|"
- - find:
- path: script:matches(window\.productData)
- do:
- - parse:
- filter:
- - window.productData\s*=\s*\'\s*(.+)\s*\'\s*;
- - normalize:
- routine: Base64ZLIBDecode
- - normalize:
- routine: json2xml
- - to_block
- - find:
- path: body_safe
- do:
- - find:
- path: primaryslice:hasChild(displaylabel:matches(Color))
- do:
- - find:
- path: sliceitems > displayname
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: variations
- joinby: "|"
- - find:
- path: sliceitems
- do:
- - variable_clear: iid
- - find:
- path: id
- slice: 0
- do:
- - parse
- - variable_set: iid
- - find:
- path: images
- do:
- - parse
- - register_set: http://images.anthropologie.com/is/image/Anthropologie/<%iid%>_<%register%>
- - object_field_set:
- object: product
- field: images
- joinby: "|"
- - find:
- path: product > stylenumber
- slice: 0
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: sku
- - find:
- path: product > product > brand
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: brand
- - find:
- path: product > product > displayname
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: name
- - find:
- path: product > product > longdescription
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: description
- - variable_get: cat1
- - if:
- match: (\S)
- do:
- - object_field_set:
- object: product
- field: category
- joinby: "|"
- - variable_get: cat2
- - if:
- match: (\S)
- do:
- - object_field_set:
- object: product
- field: category
- joinby: "|"
- - variable_get: cat3
- - if:
- match: (\S)
- do:
- - object_field_set:
- object: product
- field: category
- joinby: "|"
- - object_save:
- name: product
- - variable_get: subcat
- - if:
- match: (1)
- else:
- - find:
- path: .c-main-navigation__a--level-2
- do:
- - parse:
- attr: href
- - pool_clear: main
- - link_add:
- pool: main
- - walk:
- to: links
- pool: main
- do:
- - find:
- path: .js-pagination__arrow--next
- slice: 0
- do:
- - parse:
- attr: href
- - link_add:
- pool: main
- - find:
- path: .c-product-tile__image-link
- do:
- - parse:
- attr: href
- filter:
- - (.+)\?
- - (.+)
- - normalize:
- routine: url
- - walk:
- to: value
- do:
- - find:
- path: body
- do:
- - object_new: product
- - eval:
- routine: js
- body: '(function (){var d = new Date(); return d.toISOString()})();'
- - object_field_set:
- object: product
- field: date
- - register_set: Anthropologie
- - object_field_set:
- object: product
- field: brand
- - static_get: url
- - object_field_set:
- object: product
- field: url
- - find:
- path: meta[property="product:price:amount"]
- do:
- - parse:
- attr: content
- - if:
- match: (\d)
- do:
- - object_field_set:
- object: product
- field: price
- type: float
- - register_set: USD
- - object_field_set:
- object: product
- field: currency
- - find:
- path: .o-carousel__flex-wrapper > img.c-product-image
- do:
- - parse:
- attr: src
- filter:
- - (.+)\?
- - (.+)
- - normalize:
- routine: url
- - object_field_set:
- object: product
- field: images
- joinby: "|"
- - find:
- path: script:matches(window\.productData)
- do:
- - parse:
- filter:
- - window.productData\s*=\s*\'\s*(.+)\s*\'\s*;
- - normalize:
- routine: Base64ZLIBDecode
- - normalize:
- routine: json2xml
- - to_block
- - find:
- path: body_safe
- do:
- - find:
- path: primaryslice:hasChild(displaylabel:matches(Color))
- do:
- - find:
- path: sliceitems > displayname
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: variations
- joinby: "|"
- - find:
- path: sliceitems
- do:
- - variable_clear: iid
- - find:
- path: id
- slice: 0
- do:
- - parse
- - variable_set: iid
- - find:
- path: images
- do:
- - parse
- - register_set: http://images.anthropologie.com/is/image/Anthropologie/<%iid%>_<%register%>
- - object_field_set:
- object: product
- field: images
- joinby: "|"
- - find:
- path: product > stylenumber
- slice: 0
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: sku
- - find:
- path: product > product > brand
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: brand
- - find:
- path: product > product > displayname
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: name
- - find:
- path: product > product > longdescription
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: description
- - variable_get: cat1
- - if:
- match: (\S)
- do:
- - object_field_set:
- object: product
- field: category
- joinby: "|"
- - variable_get: cat2
- - if:
- match: (\S)
- do:
- - object_field_set:
- object: product
- field: category
- joinby: "|"
- - variable_get: cat3
- - if:
- match: (\S)
- do:
- - object_field_set:
- object: product
- field: category
- joinby: "|"
- - object_save:
- name: product
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement