Advertisement
Guest User

Untitled

a guest
Jul 26th, 2017
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.18 KB | None | 0 0
  1. {
  2. "name": "yardbarker",
  3. "base_url": "",
  4. "start_urls":[
  5. "http://www.yardbarker.com"
  6. ],
  7. "rules":[
  8. {
  9. "link_extractor":{
  10. "allow": "http:\/\/www.yardbarker.com\/\w+\/articles\/[\w*]*\/[\w*]*$",
  11. "allow_domains":null,
  12. "deny_extensions":null,
  13. "restrict_xpaths": null,
  14. "restrict_css": null,
  15. "tags": null,
  16. "attrs": null
  17. },
  18. "follow": true,
  19. "callback": "parse_article"
  20. },
  21. {
  22. "link_extractor":{
  23. "allow": null,
  24. "deny":null,
  25. "allow_domains": "www.yardbarker.com",
  26. "deny_extensions":null,
  27. "restrict_xpaths": null,
  28. "restrict_css": null,
  29. "tags": null,
  30. "attrs": null
  31. },
  32. "follow": true
  33. }
  34. ],
  35. "document_selectors":{
  36. "title": {
  37. "xpath": [
  38. "//div[@class='article_left_column']/div[@class='art_headline']/h1/text()"
  39. ]
  40. },
  41. "text": {
  42. "xpath": [
  43. "//div[@class='article_left_column']/div[@class='art_body_article']/div[contains(@class,'froala-view')]/div[@class='article_chunk']/p/text()"
  44. ]
  45. },
  46. "categories": {
  47. "xpath": [
  48. "//div[@id='nav']/ul[@class='first']/li[contains(@class,'preselected')]/a/text()"
  49. ]
  50. }
  51. }
  52. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement