Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ---
- config:
- debug: 2
- agent: Firefox
- proxy: #USE YOUR PROXY HERE LIKE 1.1.1.1:8888
- do:
- - link_add:
- url:
- - https://www.bedbathandbeyond.com/__ssobj/static/giftsNavOutHol.json?v=8
- - https://www.bedbathandbeyond.com/__ssobj/static/personalizedgiftsNavOutHol.json?v=8
- - https://www.bedbathandbeyond.com/__ssobj/static/beddingNavOutHol.json?v=8
- - https://www.bedbathandbeyond.com/__ssobj/static/bathNavOutHol.json?v=8
- - https://www.bedbathandbeyond.com/__ssobj/static/kitchenNavOutHol.json?v=8
- - https://www.bedbathandbeyond.com/__ssobj/static/diningNavOutHol.json?v=8
- - https://www.bedbathandbeyond.com/__ssobj/static/homedecorNavOutHol.json?v=8
- - https://www.bedbathandbeyond.com/__ssobj/static/furnitureNavOutHol.json?v=8
- - https://www.bedbathandbeyond.com/__ssobj/static/storagecleaningNavOutHol.json?v=8
- - https://www.bedbathandbeyond.com/__ssobj/static/outdoorNavOutHol.json?v=8
- - https://www.bedbathandbeyond.com/__ssobj/static/babykidsNavOutHol.json?v=8
- - https://www.bedbathandbeyond.com/__ssobj/static/healthbeautyNavOutHol.json?v=8
- - https://www.bedbathandbeyond.com/__ssobj/static/moreNavOutHol.json?v=8
- - https://www.bedbathandbeyond.com/__ssobj/static/shopsNavOutHol.json?v=8
- - walk:
- to: links
- do:
- - find:
- path: l2url,l3url
- do:
- - parse:
- filter: ^([^\?]+)
- - trim
- - if:
- match: \w+
- do:
- - normalize:
- routine: url
- - link_add:
- pool: catalog
- - walk:
- to: links
- pool: catalog
- do:
- - find:
- path: "span#ctl00_InvalidRequest"
- do:
- - proxy_switch
- - page_reload
- - find:
- path: li.lnkNextPage>a
- do:
- - parse:
- attr: href
- - trim
- - if:
- match: \w+
- do:
- - normalize:
- routine: url
- - link_add:
- pool: catalog
- - find:
- path: a.prodImg
- do:
- - parse:
- attr: href
- filter: ^([^\?]+)
- - trim
- - if:
- match: \w+
- do:
- - normalize:
- routine: url
- - link_add:
- pool: pages
- - walk:
- to: links
- pool: pages
- mode: unique
- do:
- - find:
- path: "span#ctl00_InvalidRequest"
- do:
- - proxy_switch
- - page_reload
- - find:
- path: "span#ctl00_InvalidRequest"
- do:
- - exit
- - find:
- path: "div#content"
- do:
- - variable_clear: pid
- - variable_set:
- field: brand
- value: BedBathAndBeyond
- - object_new: product
- - eval:
- routine: js
- body: '(function (){var d = new Date(); return d.toISOString()})();'
- - object_field_set:
- object: product
- field: date
- - static_get: url
- - object_field_set:
- object: product
- field: url
- - find:
- path: 'h1#productTitle'
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: name
- - find:
- path: div[itemprop="brand"] span[itemprop="name"]
- do:
- - parse
- - space_dedupe
- - trim
- - variable_set: brand
- - variable_get: brand
- - object_field_set:
- object: product
- field: brand
- - find:
- path: p.prodSKU
- slice: 0
- do:
- - parse:
- filter: (\d+)
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: sku
- - find:
- path: span[itemprop="priceCurrency"]
- do:
- - parse
- - normalize:
- routine: replace_matched
- args:
- \$: USD
- - object_field_set:
- object: product
- field: currency
- - find:
- path: span[itemprop="price"],span[itemprop="lowPrice"]
- do:
- - parse:
- filter:
- - ([0-9\.]+)\s*-
- - ([0-9\.]+)
- - object_field_set:
- object: product
- type: float
- field: price
- - find:
- path: li.colorSwatchLi
- do:
- - parse:
- attr: data-attr
- - space_dedupe
- - trim
- - if:
- match: \w+
- do:
- - object_field_set:
- object: product
- joinby: "|"
- field: variations
- - parse:
- attr: data-imgurlthumb
- filter: ^([^\?]+)
- - space_dedupe
- - trim
- - if:
- match: \w+
- do:
- - normalize:
- routine: url
- - variable_set: iurl
- - register_set: <%iurl%>?scl=1
- - object_field_set:
- object: product
- joinby: "|"
- field: images
- - find:
- path: div[itemprop="description"]
- do:
- - parse
- - space_dedupe
- - trim
- - if:
- match: \w+
- do:
- - object_field_set:
- object: product
- field: description
- - find:
- path: div.breadcrumbs>div.alpha>a
- slice: 1:-1
- do:
- - parse
- - space_dedupe
- - trim
- - if:
- match: \w+
- do:
- - object_field_set:
- object: product
- joinby: "|"
- field: category
- - find:
- path: 'img#mainProductImg'
- do:
- - parse:
- attr: src
- filter: ^([^\?]+)
- - if:
- match: \w+
- do:
- - normalize:
- routine: url
- - variable_set: iurl
- - register_set: <%iurl%>?scl=1
- - object_field_set:
- object: product
- joinby: "|"
- field: images
- - find:
- path: 'div#s7ProductImageWrapper'
- do:
- - parse:
- attr: data-s7imageid
- - if:
- match: \d+
- do:
- - variable_set: iid
- - walk:
- to: https://s7d9.scene7.com/is/image/BedBathandBeyond/<%iid%>_is?req=set,json,UTF-8
- do:
- - find:
- path: script
- do:
- - parse:
- filter: s7jsonResponse\((.+)\,\"\"\)\;
- - normalize:
- routine: unescape_html
- - normalize:
- routine: json2xml
- - to_block
- - find:
- path: item>i>n
- do:
- - parse
- - if:
- match: \d+
- do:
- - variable_set: iurl
- - register_set: https://s7d9.scene7.com/is/image/<%iurl%>?scl=1
- - object_field_set:
- object: product
- joinby: "|"
- field: images
- - object_save:
- name: product
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement