Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ---
- config:
- agent: Firefox
- debug: 2
- do:
- - link_add:
- pool: c
- url: "https://www.healthdata.gov/search/field_topic/hospital-17821?query=&sort_by=changed&sort_order=DESC"
- - walk:
- to: links
- pool: c
- do:
- - find:
- path: .search-result-dataset > h2 > a
- do:
- - parse:
- attr: href
- - normalize:
- routine: url
- - walk:
- to: value
- do:
- - find:
- path: body
- do:
- - object_new: item
- - find:
- path: div[property="content:encoded"]
- do:
- - parse
- - object_field_set:
- object: item
- field: description
- #1 table
- - find:
- path: table.field-group-format.group_additional
- do:
- - find:
- path: tbody > tr
- do:
- - find:
- path: th
- do:
- - parse
- - variable_clear: header
- - variable_set: header
- - find:
- path: td
- do:
- - parse
- - object_field_set:
- object: item
- field: <%header%>
- #2 table
- - find:
- path: .table-responsive > table
- do:
- - find:
- path: tbody > tr
- do:
- - find:
- path: td
- slice: 0
- do:
- - parse
- - variable_clear: header
- - variable_set: header
- - find:
- path: td
- slice: 1
- do:
- - parse
- - object_field_set:
- object: item
- field: <%header%>
- - object_save:
- name: item
- - find:
- path: .pagination.pager
- do:
- - parse
- - if:
- match: "next"
- do:
- - find:
- path: .pager-next > a
- do:
- - parse:
- attr: href
- - normalize:
- routine: url
- - link_add:
- pool: c
- else:
- - pool_clear: c
- - find:
- path: .pager-first.first > a
- do:
- - parse:
- attr: href
- - normalize:
- routine: url
- - link_add:
- pool: c
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement