Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ---
- config:
- debug: 2
- do:
- - pool_clear: pages
- - walk:
- to: https://www.anntaylor.com
- do:
- - find:
- path: nav.sub-nav a
- do:
- - variable_clear: did
- - variable_set:
- field: viewsnum
- value: 0
- - parse:
- attr: data-id
- - variable_set: did
- - walk:
- to: https://www.anntaylor.com/ecws/endecaService.jsp?SortByFacetSelectedValue=remove&DocSortOrder=remove&format=json&catid=<%did%>&question=&fRequest=true&goToPage=1&N=0&categoryType=regular&priceSort=DESC&country=US¤cy=USD&Submit=Submit
- do:
- - find:
- path: resultslist>pagination>attributes>pagesavailable
- do:
- - parse
- - variable_set: viewsnum
- - eval:
- routine: js
- body: (function(){var num = <%viewsnum%>;var str = "";for(var i = num; i > 0; i--){if (i != num){str += ","}str += i} return "<div>"+str+"</div>";})();
- - to_block
- - split:
- context: text
- delimiter: ","
- - find:
- path: div
- do:
- - variable_clear: pagenum
- - parse
- - variable_set: pagenum
- - link_add:
- url: https://www.anntaylor.com/ecws/endecaService.jsp?SortByFacetSelectedValue=remove&DocSortOrder=remove&format=json&catid=<%did%>&question=&fRequest=true&goToPage=<%pagenum%>&N=0&categoryType=regular&priceSort=DESC&country=US¤cy=USD&Submit=Submit
- pool: catalog
- - walk:
- to: links
- pool: catalog
- do:
- - find:
- path: resultslist>records>records>attributes>quicklookurl
- do:
- - parse:
- filter: ^([^\?]+)
- - normalize:
- routine: url
- - link_add:
- pool: pages
- - walk:
- to: links
- pool: pages
- do:
- - sleep: 3
- - find:
- path: main
- do:
- - variable_clear: pid
- - object_new: product
- - eval:
- routine: js
- body: '(function (){var d = new Date(); return d.toISOString()})();'
- - object_field_set:
- object: product
- field: date
- - static_get: url
- - object_field_set:
- object: product
- field: url
- - find:
- path: h1[itemprop="name"]
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: name
- - register_set: Ann Taylor
- - object_field_set:
- object: product
- field: brand
- - find:
- in: doc
- path: script:contains("window.productSettings = ")
- do:
- - parse:
- filter: window\.productSettings\s+=\s+(.+)\s*$
- - normalize:
- routine: json2xml
- - to_block
- - find:
- path: body_safe>currency
- do:
- - parse
- - normalize:
- routine: replace_matched
- args:
- \$: USD
- - object_field_set:
- object: product
- field: currency
- - find:
- path: body_safe>products>listprice
- do:
- - parse
- - object_field_set:
- object: product
- type: float
- field: price
- - find:
- path: body_safe>prodid
- do:
- - parse
- - space_dedupe
- - trim
- - variable_set: pid
- - object_field_set:
- object: product
- field: sku
- - find:
- path: body_safe>products>skucolors>colors
- do:
- - find:
- path: colorname
- do:
- - parse
- - space_dedupe
- - trim
- - if:
- match: \w+
- do:
- - object_field_set:
- object: product
- joinby: "|"
- field: variations
- - walk:
- to: https://richmedia.channeladvisor.com/ViewerDelivery/productXmlService?profileid=52000652&itemid=<%pid%>&viewerid=196
- do:
- - find:
- path: img
- do:
- - parse:
- attr: path
- - normalize:
- routine: replace_substring
- args:
- \&recipeId\=\d+: ''
- - object_field_set:
- object: product
- joinby: "|"
- field: images
- - find:
- path: body_safe>products>weblongdescription
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: description
- - find:
- path: body_safe>products>parentcategoryname
- do:
- - parse
- - space_dedupe
- - trim
- - if:
- match: \w+
- do:
- - object_field_set:
- object: product
- joinby: "|"
- field: category
- - object_save:
- name: product
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement