Advertisement
Guest User

Untitled

a guest
Jan 20th, 2017
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.79 KB | None | 0 0
  1. ## b3 p0lit3
  2. USER_AGENT = ' *companyname* TUTORIAL BOT - (*myemail*) | No content Generated will be used - For Educational Purpose'
  3. DOWNLOAD_DELAY = 5.0
  4. AUTOTHROTTLE_ENABLED = True
  5. HTTPCACHE_ENABLED = True
  6.  
  7. BOT_NAME = 'flaticontest'
  8.  
  9. SPIDER_MODULES = ['flaticontest.spiders']
  10. NEWSPIDER_MODULE = 'flaticontest.spiders'
  11. IMAGES_STORE = '/home/scriptso/Desktop/flattetstn1'
  12. ROBOTSTXT_OBEY = True
  13.  
  14. ITEM_PIPELINES = {'scrapy.pipelines.images.ImagesPipeline': 1}
  15.  
  16. import scrapy
  17.  
  18.  
  19. class FlaticontestItem(scrapy.Item):
  20. images = scrapy.Field()
  21. image_urls = scrapy.Field()
  22. title = scrapy.Field()
  23. pachName = scrapy.Field()
  24. image_name = scrapy.Field()
  25.  
  26. from scrapy.contrib.pipeline.images import ImagesPipeline
  27. class FlaticontestPipeline(object):
  28. def process_item(self, item, spider):
  29. return item
  30.  
  31. class CustomImageNamePipeline(ImagesPipeline):
  32.  
  33. def get_media_requests(self, item, info):
  34. return [Request(x, meta={'image_name': item["image_name"]})
  35. for x in item.get('image_urls', [])]
  36.  
  37. def file_path(self, request, response=None, info=None):
  38. return '%s.jpg' % request.meta['image_name']
  39.  
  40. import scrapy
  41. from flaticontest.items import FlaticontestItem
  42.  
  43. class FltspiSpider(scrapy.Spider):
  44. name = "fltSpi"
  45. allowed_domains = ["flaticon.com"]
  46. start_urls = []
  47.  
  48. for num in range(1,2000):
  49. start_urls.append("http://www.flaticon.com/free-icons/computing_23394/" + str(num))
  50.  
  51. def parse(self, response):
  52. for icon in response.css('.icon'):
  53. yield {
  54. 'title': icon.css('img').re('title="(.*?)"'),
  55. 'image_urls': icon.css('img').re('set="(.*?) 4x'),
  56. 'pach-name': icon.css('li').re('data-pack="(.*)" '),
  57. 'image_name': icon.css('img').re('title="(.*?)"'),
  58. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement