Advertisement
Guest User

Untitled

a guest
Jul 24th, 2017
52
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.88 KB | None | 0 0
  1. {
  2. "name": "jalopnik.com",
  3. "base_url": "",
  4. "start_urls":[
  5. "http://www.jalopnik.com/"
  6. ],
  7. "rules":[
  8. {
  9. "link_extractor":{
  10. "allow": "jalopnik.com/.*-\\d+",
  11. "deny": null,
  12. "allow_domains":null,
  13. "deny_extensions":null,
  14. "restrict_xpaths": null,
  15. "restrict_css": null,
  16. "tags": null,
  17. "attrs": null
  18. },
  19. "follow": true,
  20. "callback": "parse_article"
  21. },
  22. {
  23. "link_extractor":{
  24. "allow": "jalopnik.com",
  25. "deny":null,
  26. "allow_domains": "jalopnik.com",
  27. "deny_extensions":null,
  28. "restrict_xpaths": null,
  29. "restrict_css": null,
  30. "tags": null,
  31. "attrs": null
  32. },
  33. "follow": true
  34. }
  35.  
  36. ],
  37. "document_selectors":{
  38. "title": {
  39. "xpath": [
  40. "//h1[contains(@class, 'title')]/*[string()]"
  41. ]
  42. },
  43. "text": {
  44. "xpath": [
  45. "//div[contains(@class, 'post-content')]/*[string()]"
  46. ]
  47. }
  48. }
  49. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement