Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ---
- config:
- debug: 2
- agent: Firefox
- do:
- - walk:
- to: http://www.alexandermcqueen.com/us/
- do:
- - find:
- path: ul.level-1>li
- do:
- - variable_clear: cat1
- - parse:
- attr: id
- - normalize:
- routine: replace_matched
- args:
- shop_womenswear: Womens
- shop_menswear: Mens
- .+: ''
- - variable_set: cat1
- - find:
- path: ul.level-2>li
- do:
- - variable_clear: cat2
- - find:
- path: a
- slice: 0
- do:
- - parse
- - space_dedupe
- - trim
- - variable_set: cat2
- - find:
- path: ul.level-3>li
- do:
- - variable_clear: cat3
- - find:
- path: a
- slice: 0
- do:
- - parse
- - space_dedupe
- - trim
- - variable_set: cat3
- - parse:
- attr: href
- - space_dedupe
- - trim
- - if:
- match: \w+
- do:
- - normalize:
- routine: url
- - walk:
- to: value
- do:
- - find:
- path: script:contains('yTos.navigation =')
- do:
- - parse:
- filter: yTos\.navigation\s+\=\s+(.+)\;
- - normalize:
- routine: json2xml
- - to_block
- - find:
- path: pathandqueryparsed:has(paramname:matches(^sitecode$))
- do:
- - find:
- path: paramvalue
- do:
- - parse
- - variable_set: sitecode
- - find:
- path: pathandqueryparsed:has(paramname:matches(^dept$))
- do:
- - find:
- path: paramvalue
- do:
- - parse
- - variable_set: department
- - find:
- path: pathandqueryparsed:has(paramname:matches(^season$))
- do:
- - find:
- path: paramvalue
- do:
- - parse
- - normalize:
- routine: replace_substring
- args:
- \,: "%2C"
- - variable_set: season
- - find:
- path: pathandqueryparsed:has(paramname:matches(^gender$))
- do:
- - find:
- path: paramvalue
- do:
- - parse
- - variable_set: gender
- - find:
- path: pathandqueryparsed:has(paramname:matches(^yurirulename$))
- do:
- - find:
- path: paramvalue
- do:
- - parse
- - variable_set: yurirulename
- - walk:
- to: http://www.alexandermcqueen.com/Search/RenderProducts?ytosQuery=true&department=<%department%>&gender=<%gender%>&season=<%season%>&yurirulename=<%yurirulename%>&page=1&productsPerPage=1000&suggestion=false&totalPages=1&partialLoadedItems=1000&siteCode=<%sitecode%>
- do:
- - find:
- path: article>a
- do:
- - parse:
- attr: href
- filter: ^([^\#]+)
- - walk:
- to: value
- do:
- - sleep: 2
- - find:
- path: article.item
- do:
- - variable_clear: pid
- - variable_clear: cid
- - object_new: product
- - eval:
- routine: js
- body: '(function (){var d = new Date(); return d.toISOString()})();'
- - object_field_set:
- object: product
- field: date
- - static_get: url
- - object_field_set:
- object: product
- field: url
- - register_set: Alexander McQueen
- - object_field_set:
- object: product
- field: brand
- - find:
- path: h2.modelName
- do:
- - parse
- - space_dedupe
- - trim
- - object_field_set:
- object: product
- field: name
- - find:
- in: doc
- path: meta[name="description"]
- do:
- - parse:
- attr: content
- - space_dedupe
- - trim
- - variable_set: desc
- - find:
- path: div.descriptionsContainer>div.EditorialDescription
- do:
- - parse
- - space_dedupe
- - trim
- - variable_set: desc
- - variable_get: desc
- - object_field_set:
- object: product
- field: description
- - find:
- path: div.itemPriceContainer span.price
- slice: 0
- do:
- - find:
- path: span.currency
- do:
- - parse
- - normalize:
- routine: replace_matched
- args:
- \$: USD
- - object_field_set:
- object: product
- field: currency
- - find:
- path: span.value
- do:
- - parse
- - normalize:
- routine: replace_substring
- args:
- - \,: ''
- - \s+: ''
- - object_field_set:
- object: product
- type: float
- field: price
- - find:
- path: div.modelFabricColor>span.value
- do:
- - parse
- - space_dedupe
- - trim
- - if:
- match: \w+
- do:
- - variable_set: pid
- - object_field_set:
- object: product
- field: sku
- - variable_get: cat1
- - if:
- match: \w{2,}
- do:
- - object_field_set:
- object: product
- joinby: "|"
- field: category
- - variable_get: cat2
- - if:
- match: \w{2,}
- do:
- - object_field_set:
- object: product
- joinby: "|"
- field: category
- - variable_get: cat3
- - if:
- match: \w{2,}
- do:
- - object_field_set:
- object: product
- joinby: "|"
- field: category
- - find:
- path: ul.alternativeImages>li>img
- do:
- - parse:
- attr: srcset
- - to_block
- - split:
- context: text
- delimiter: \,\s*
- - find:
- path: div.splitted
- slice: 0
- do:
- - parse:
- filter: ^([^\s]+)
- - object_field_set:
- object: product
- joinby: "|"
- field: images
- - find:
- path: div.selectColor
- slice: 0
- do:
- - variable_clear: cod10
- - find:
- in: doc
- path: script:contains("yTos.navigation.itemData =")
- do:
- - parse:
- filter: yTos\.navigation\.itemData\s+\=\s+(.+)\;
- - normalize:
- routine: json2xml
- - to_block
- - find:
- path: cod10
- do:
- - parse
- - space_dedupe
- - trim
- - variable_set: cod10
- - walk:
- to: http://www.alexandermcqueen.com/yTos/api/Plugins/ItemPluginApi/GetCombinationsAsync/?siteCode=<%sitecode%>&code10=<%cod10%>
- do:
- - find:
- path: body_safe>colors
- do:
- - find:
- path: description
- do:
- - parse
- - space_dedupe
- - trim
- - if:
- match: \w+
- do:
- - object_field_set:
- object: product
- joinby: "|"
- field: variations
- - object_save:
- name: product
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement