Advertisement
Guest User

Untitled

a guest
Oct 27th, 2014
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.00 KB | None | 0 0
  1. import logging
  2. import os
  3.  
  4. from grab.spider import Spider, Task
  5.  
  6.  
  7. class JavaGamesSpider(Spider):
  8.     initial_urls = ['http://dc.sever.ru/?d=19']
  9.  
  10.     def task_initial(self, grab, task):
  11.         for elem in grab.doc.select('/html/body/div[3]//dt/a'):
  12.             name = ' '.join(elem.text().split()[:-1])
  13.  
  14.             dir_name = './data/' + name
  15.             if not os.path.exists(dir_name):
  16.                 os.makedirs(dir_name)
  17.  
  18.             yield Task('dir', url='http://dc.sever.ru' + elem.attr('href'), dir=name)
  19.  
  20.     def task_dir(self, grab, task):
  21.         for elem in grab.doc.select('/html/body/div[3]//dt/a'):
  22.             name = ' '.join(elem.text().split()[:-1])
  23.  
  24.             dir_name = './data/' + task.dir + '/' + name
  25.             if not os.path.exists(dir_name):
  26.                 os.makedirs(dir_name)
  27.  
  28.             yield Task('category', url='http://dc.sever.ru' + elem.attr('href'), dir=name, pre_dir=task.dir)
  29.  
  30.     def task_category(self, grab, task):
  31.         for elem in grab.doc.select('/html/body/div[3]//dt/a'):
  32.             name = ' '.join(elem.text().split()[:-1])
  33.  
  34.             dir_name = './data/' + task.pre_dir + '/' + task.dir + '/' + name
  35.             if not os.path.exists(dir_name):
  36.                 os.makedirs(dir_name)
  37.  
  38.             yield Task('files', url='http://dc.sever.ru' + elem.attr('href'), path=dir_name)
  39.  
  40.     def task_files(self, grab, task):
  41.         for elem in grab.doc.select('/html/body/div[4]/dl//a'):
  42.             name = elem.text()
  43.  
  44.             dir_name = task.path + '/' + name
  45.             if not os.path.exists(dir_name):
  46.                 os.makedirs(dir_name)
  47.  
  48.             yield Task('file', url='http://dc.sever.ru' + elem.attr('href'), dir=dir_name)
  49.  
  50.         url_next = grab.doc.select('/html/body/div[5]/a')
  51.         if url_next.exists():
  52.             for link in url_next:
  53.                 if link.text() == 'Далее->':
  54.                     yield Task('files', url='http://dc.sever.ru' + link.attr('href'), path=task.path)
  55.  
  56.     def task_file(self, grab, task):
  57.         images = grab.doc.select('/html/body/div[2]/a[starts-with(@href, "index")]')
  58.         end_index = 1
  59.         if images.exists():
  60.             for item in images:
  61.                 end_index = item.text()
  62.  
  63.         image = grab.doc.select('/html/body/div[2]/img[2]')
  64.         if image.exists():
  65.             fid = task.url.split('=')[2].replace('&order', '')
  66.             for index in range(1, int(end_index) + 1):
  67.                 n = index - 1
  68.                 path = task.dir + '/screenshot_%d.jpg' % n
  69.                 if not os.path.exists(path):
  70.                     yield Task('screen', url='http://dc.sever.ru/scr.php?c=%s&s=%d' % (fid, n), path=task.dir, nid=n)
  71.  
  72.         files = grab.doc.select('/html/body/div[3]/a[starts-with(@href, "/get")]')
  73.         if files.exists():
  74.             for file in files:
  75.                 file_type = file.text().replace('Скачать(', '').replace(')', '').lower()
  76.                 path = task.dir + '/game.%s' % file_type
  77.                 if not os.path.exists(path):
  78.                     yield Task('game', url='http://dc.sever.ru' + file.attr('href'), path=task.dir, type=file_type)
  79.  
  80.         description = grab.doc.select('/html/body/div[2]')
  81.         description = description.text().split('Описание:')[1]
  82.  
  83.         stop_words = ['От Sever.Ru:', 'Рейтинг (max10):', 'Разработчик:', 'Версия:', 'Год:', 'Язык:', 'Добавлено:']
  84.         for item in stop_words:
  85.             description = description.split(item)[0]
  86.  
  87.         f = open(task.dir + '/description.txt', 'w')
  88.         f.write(description.strip())
  89.         f.close()
  90.  
  91.     def task_game(self, grab, task):
  92.         path = task.path + '/game.%s' % task.type
  93.         grab.response.save(path)
  94.  
  95.     def task_screen(self, grab, task):
  96.         path = task.path + '/screenshot_%d.jpg' % task.nid
  97.         grab.response.save(path)
  98.  
  99. if __name__ == '__main__':
  100.     logging.basicConfig(level=logging.DEBUG)
  101.     bot = JavaGamesSpider(thread_number=2)
  102.     bot.run()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement