Advertisement
Guest User

newpct.py

a guest
Nov 11th, 2017
125
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 18.38 KB | None | 0 0
  1. # coding=utf-8
  2. # Author: CristianBB
  3. # Greetings to Mr. Pine-apple
  4. # URL: https://sickrage.github.io
  5. #
  6. # This file is part of SickRage.
  7. #
  8. # SickRage is free software: you can redistribute it and/or modify
  9. # it under the terms of the GNU General Public License as published by
  10. # the Free Software Foundation, either version 3 of the License, or
  11. # (at your option) any later version.
  12. #
  13. # SickRage is distributed in the hope that it will be useful,
  14. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. # GNU General Public License for more details.
  17. #
  18. # You should have received a copy of the GNU General Public License
  19. # along with SickRage. If not, see <http://www.gnu.org/licenses/>.
  20.  
  21. from __future__ import print_function, unicode_literals
  22.  
  23. import re
  24.  
  25. from requests.compat import urljoin
  26.  
  27. from sickbeard import helpers, logger, tvcache
  28. from sickbeard.bs4_parser import BS4Parser
  29. from sickrage.helper.common import convert_size
  30. from sickbeard.show_name_helpers import allPossibleShowNames
  31. from sickrage.providers.torrent.TorrentProvider import TorrentProvider
  32.  
  33.  
  34. class newpctProvider(TorrentProvider):
  35.  
  36. def __init__(self):
  37.  
  38. TorrentProvider.__init__(self, 'Newpct')
  39.  
  40. self.onlyspasearch = None
  41.  
  42. self.url = 'http://www.newpct.com'
  43. self.urls = {'search': [ urljoin(self.url, '/series'),
  44. urljoin(self.url, '/series-hd')],
  45. #urljoin(self.url, '/series-vo')],
  46. 'rss':urljoin(self.url, '/feed'),
  47. 'download': 'http://tumejorserie.com/descargar/index.php?link=torrents/%s.torrent',}
  48.  
  49. self.cache = tvcache.TVCache(self, min_time=20)
  50.  
  51. def _get_season_search_strings(self, ep_obj):
  52. search_string = {'Season': []}
  53.  
  54. for show_name in set(allPossibleShowNames(ep_obj.show)):
  55. search_string['Season'].append(show_name)
  56.  
  57. return [search_string]
  58.  
  59. def _get_episode_search_strings(self, ep_obj, add_string=''):
  60. search_string = {'Episode': []}
  61.  
  62. for show_name in set(allPossibleShowNames(ep_obj.show)):
  63. search_string['Episode'].append(show_name)
  64.  
  65. return [search_string]
  66.  
  67. def search(self, search_strings, age=0, ep_obj=None): # pylint: disable=too-many-locals
  68. """
  69. Search query:
  70. http://www.newpct.com/index.php?l=doSearch&q=fringe&category_=All&idioma_=1&bus_de_=All
  71. q => Show name
  72. category_ = Category 'Shows' (767)
  73. idioma_ = Language Spanish (1), All
  74. bus_de_ = Date from (All, mes, semana, ayer, hoy)
  75. """
  76. results = []
  77.  
  78. # Only search if user conditions are true
  79. lang_info = '' if not ep_obj or not ep_obj.show else ep_obj.show.lang
  80.  
  81. for mode in search_strings:
  82. items = []
  83. logger.log('Search Mode: {0}'.format(mode), logger.DEBUG)
  84.  
  85. if mode == 'RSS':
  86.  
  87. data = self.cache.get_rss_feed(self.urls['rss'], params=None)['entries']
  88. if not data:
  89. logger.log('Data returned from provider does not contain any torrents', logger.DEBUG)
  90. continue
  91.  
  92. for curItem in data:
  93. try:
  94.  
  95. title = curItem['title'].decode('utf8')
  96. download_url = curItem['link']
  97. if not all([title, download_url]):
  98. continue
  99.  
  100. title = self._processTitle(title, None, download_url)
  101. result = {'title': title, 'link': download_url}
  102.  
  103. items.append(result)
  104. except StandardError:
  105. continue
  106.  
  107. else:
  108.  
  109. # Only search if user conditions are true
  110. if self.onlyspasearch and lang_info != 'es':
  111. logger.log('Show info is not spanish, skipping provider search', logger.DEBUG)
  112. continue
  113.  
  114. for series_name in search_strings[mode]:
  115. search_name = re.sub(r'[ \.\(\)]', '-', series_name, flags=re.I)
  116. search_names = [search_name]
  117. search_name = re.sub(r'-+', '-', search_name, flags=re.I)
  118. if not search_name in search_names:
  119. search_names.append(search_name)
  120.  
  121. for search_name in search_names:
  122. for search_url in self.urls['search']:
  123. pg = 1
  124. while True:
  125. url = search_url + '/' + search_name + '//pg/' + str(pg)
  126.  
  127. try:
  128. data = self.get_url(url, params=None, returns='text')
  129. items = self.parse(series_name, data, mode)
  130. if not len(items):
  131. break
  132. results += items
  133. except Exception:
  134. logger.log('No data returned from provider', logger.DEBUG)
  135. break
  136.  
  137. pg += 1
  138.  
  139. results += items
  140.  
  141. return results
  142.  
  143. def parse(self, series_name, data, mode):
  144. """
  145. Parse search results for items.
  146.  
  147. :param data: The raw response from a search
  148. :param mode: The current mode used to search, e.g. RSS
  149.  
  150. :return: A list of items found
  151. """
  152.  
  153. results = []
  154.  
  155. with BS4Parser(data) as html:
  156. torrent_table = html.find('ul', class_='buscar-list')
  157. torrent_rows = torrent_table('li') if torrent_table else []
  158.  
  159. # Continue only if at least one release is found
  160. if not len(torrent_rows):
  161. sickrage.app.srLogger.debug('Data returned from provider does not contain any torrents')
  162. return results
  163.  
  164. for row in torrent_rows:
  165. try:
  166. torrent_anchor = row.find_all('a')[1]
  167. title = torrent_anchor.get_text()
  168. download_url = torrent_anchor.get('href', '')
  169.  
  170. if not all([title, download_url]):
  171. continue
  172.  
  173. row_spans = row.find_all('span')
  174. size = convert_size(row_spans[-2].get_text().strip()) if row_spans and len(row_spans) >= 2 else 0
  175. seeders = 1 # Provider does not provide seeders
  176. leechers = 0 # Provider does not provide leechers
  177.  
  178. title = self._processTitle(title, series_name, download_url)
  179.  
  180. logger.log('Found: {0} # Size {1}'.format(title, size), logger.DEBUG)
  181.  
  182. item = {
  183. 'title': title,
  184. 'link': download_url,
  185. 'size': size,
  186. 'seeders': seeders,
  187. 'leechers': leechers,
  188. }
  189.  
  190. results.append(item)
  191.  
  192. except (AttributeError, TypeError):
  193. continue
  194.  
  195. return results
  196.  
  197.  
  198. def get_url(self, url, post_data=None, params=None, timeout=30, **kwargs): # pylint: disable=too-many-arguments
  199. """
  200. returns='content' when trying access to torrent info (For calling torrent client). Previously we must parse
  201. the URL to get torrent file
  202. """
  203. trickery = kwargs.pop('returns', '')
  204. if trickery == 'content':
  205. kwargs['returns'] = 'text'
  206. data = super(newpctProvider, self).get_url(url, post_data=post_data, params=params, timeout=timeout, **kwargs)
  207.  
  208. download_id = re.search(r'http://tumejorserie.com/descargar/.+?(\d{6}).+?\.html', data, re.DOTALL).group(1)
  209. url = self.urls['download'] % download_id
  210.  
  211. kwargs['returns'] = trickery
  212. return super(newpctProvider, self).get_url(url, post_data=post_data, params=params,
  213. timeout=timeout, **kwargs)
  214.  
  215. def download_result(self, result):
  216. """
  217. Save the result to disk.
  218. """
  219.  
  220. # check for auth
  221. if not self.login():
  222. return False
  223.  
  224. urls, filename = self._make_url(result)
  225.  
  226. for url in urls:
  227. # Search results don't return torrent files directly, it returns show sheets so we must parse showSheet to access torrent.
  228. data = self.get_url(url, returns='text')
  229.  
  230. download_id = re.search(r'http://tumejorserie.com/descargar/.+?(\d{6}).+?\.html', data, re.DOTALL).group(1)
  231. url_torrent = self.urls['download'] % download_id
  232.  
  233. if url_torrent.startswith('http'):
  234. self.headers.update({'Referer': '/'.join(url_torrent.split('/')[:3]) + '/'})
  235.  
  236. logger.log('Downloading a result from {0}'.format(url))
  237.  
  238. if helpers.download_file(url_torrent, filename, session=self.session, headers=self.headers):
  239. if self._verify_download(filename):
  240. logger.log('Saved result to {0}'.format(filename), logger.INFO)
  241. return True
  242. else:
  243. logger.log('Could not download {0}'.format(url), logger.WARNING)
  244. helpers.remove_file_failed(filename)
  245.  
  246. if urls:
  247. logger.log('Failed to download any results', logger.WARNING)
  248.  
  249. return False
  250.  
  251. def _processTitle(self, title, series_name, url, try_download = True):
  252.  
  253. # Newpct titles are very very very inconsistent.
  254.  
  255. # Check if title is well formatted (RSS titles usually are)
  256. # Examples:
  257. # FooSeries - Temporada 2 [HDTV 720p][Cap.204][AC3 5.1 Español Castellano]
  258. # Salvation - Temporada 1 [HDTV][Cap.104-107][Español Castellano]
  259.  
  260. # else try to match list format
  261. # example
  262. # Serie Juego De Tronos Temporada 7 Capitulo 5 - Español Castellano Calidad [ HDTV ]
  263. # Serie Juego De Tronos Temporada [7] Capitulo [5] - Español Castellano Calidad [ HDTV ]
  264.  
  265. # else process download page title
  266. # else compose from download url
  267.  
  268. series_name = series_name or ""
  269.  
  270. logger.log('newpct _processTitle: {} # series_name {} # url {}'.format(title, series_name, url), logger.DEBUG)
  271.  
  272. #clean spaces
  273. title = self._clean_spaces(title)
  274. series_name = self._clean_spaces(series_name)
  275.  
  276. title_stdformat = r'.+-.+\d{1,2}.+\[Cap.\d{2,4}([\-\_]\d{2,4})?\]'
  277. title_listformat = r'Serie ?(.+?) ?-? ?Temporada ?\[?(\d+)\]?.*Capitulos? ?\[?(\d+)\]? ?(al ?\[?(\d+)\]?)?.*- ?(.*) ?Calidad ?(.+)'
  278. title_urlformat = r'.*\/(.*)\/capitulo-(\d{2,4})\/'
  279.  
  280. title_is_proper = re.search(r'\b(proper|repack)', title, flags=re.I)
  281.  
  282. stdformat_match = re.search(title_stdformat, title, flags=re.I)
  283. if not stdformat_match:
  284. #Try to match list format
  285. listformat_match = re.search(title_listformat, title, flags=re.I)
  286. if listformat_match:
  287. if series_name:
  288. name = series_name + ((' (' + title_is_proper.group() + ')') if title_is_proper else "")
  289. else:
  290. name = self._clean_spaces(listformat_match.group(1))
  291. season = self._clean_spaces(listformat_match.group(2))
  292. episode = self._clean_spaces(listformat_match.group(3)).zfill(2)
  293. audioquality = self._clean_spaces(listformat_match.group(6))
  294. quality = self._clean_spaces(listformat_match.group(7))
  295.  
  296. if not listformat_match.group(5):
  297. title = "{0} - Temporada {1} {2} [Cap.{3}{4}]".format(name, season, quality, season, episode)
  298. else:
  299. episode_to = self._clean_spaces(listformat_match.group(5)).zfill(2)
  300. title = "{0} - Temporada {1} {2} [Cap.{3}{4}_{5}{6}]".format(name, season, quality, season, episode, season, episode_to)
  301. logger.log('_processTitle: Matched by listFormat: {}'.format(title), logger.DEBUG)
  302. else:
  303. if try_download:
  304. # Get title from the download page
  305. try:
  306. data = self.get_url(url, params=None, returns='text')
  307. with BS4Parser(data) as details:
  308. title = details.find('h1').get_text().split('/')[1]
  309. logger.log('_processTitle: Title got from details page: {}'.format(title), logger.DEBUG)
  310. return self._processTitle(title, series_name, url, False)
  311. except (AttributeError, TypeError):
  312. logger.error('title could not be retrived')
  313. else:
  314. # Try to compose title from url
  315. url_match = re.search(title_urlformat, url, flags=re.I)
  316. if url_match:
  317. name = series_name if series_name else url_match.group(1).replace('-', ' ')
  318. season, episode = self._process_season_episode(url_match.group(2))
  319. title = '{} - Temporada {} [][Cap.{}{}]'.format(name, season, season, episode)
  320. logger.log('_processTitle: Matched by url: {}'.format(title), logger.DEBUG)
  321. else:
  322. logger.log('_processTitle: Matched by stdFormat: {}'.format(title), logger.DEBUG)
  323.  
  324. # Quality - Use re module to avoid case sensitive problems with replace
  325. title = re.sub(r'\[HDTV 1080p?[^\[]*]', '1080p HDTV x264', title, flags=re.I)
  326. title = re.sub(r'\[HDTV 720p?[^\[]*]', '720p HDTV x264', title, flags=re.I)
  327. title = re.sub(r'\[ALTA DEFINICION 720p?[^\[]*]', '720p HDTV x264', title, flags=re.I)
  328. title = re.sub(r'\[HDTV]', 'HDTV x264', title, flags=re.I)
  329. title = re.sub(r'\[DVD[^\[]*]', 'DVDrip x264', title, flags=re.I)
  330. title = re.sub(r'\[BluRay 1080p?[^\[]*]', '1080p BluRay x264', title, flags=re.I)
  331. title = re.sub(r'\[BluRay Rip 1080p?[^\[]*]', '1080p BluRay x264', title, flags=re.I)
  332. title = re.sub(r'\[BluRay Rip 720p?[^\[]*]', '720p BluRay x264', title, flags=re.I)
  333. title = re.sub(r'\[BluRay MicroHD[^\[]*]', '1080p BluRay x264', title, flags=re.I)
  334. title = re.sub(r'\[MicroHD 1080p?[^\[]*]', '1080p BluRay x264', title, flags=re.I)
  335. title = re.sub(r'\[BLuRay[^\[]*]', '720p BluRay x264', title, flags=re.I)
  336. title = re.sub(r'\[BRrip[^\[]*]', '720p BluRay x264', title, flags=re.I)
  337. title = re.sub(r'\[BDrip[^\[]*]', '720p BluRay x264', title, flags=re.I)
  338.  
  339. #detect hdtv/bluray by url
  340. #hdtv 1080p example url: http://www.newpct.com/descargar-seriehd/foo/capitulo-610/hdtv-1080p-ac3-5-1/
  341. #hdtv 720p example url: http://www.newpct.com/descargar-seriehd/foo/capitulo-26/hdtv-720p-ac3-5-1/
  342. #hdtv example url: http://www.newpct.com/descargar-serie/foo/capitulo-214/hdtv/
  343. #bluray compilation example url: http://www.newpct.com/descargar-seriehd/foo/capitulo-11/bluray-1080p/
  344. title_hdtv = re.search(r'HDTV', title, flags=re.I)
  345. title_720p = re.search(r'720p', title, flags=re.I)
  346. title_1080p = re.search(r'1080p', title, flags=re.I)
  347. title_x264 = re.search(r'x264', title, flags=re.I)
  348. title_bluray = re.search(r'bluray', title, flags=re.I)
  349. title_vo = re.search(r'\[V.O.[^\[]*]', title, flags=re.I)
  350. url_hdtv = re.search(r'HDTV', url, flags=re.I)
  351. url_720p = re.search(r'720p', url, flags=re.I)
  352. url_1080p = re.search(r'1080p', url, flags=re.I)
  353. url_bluray = re.search(r'bluray', url, flags=re.I)
  354. url_serie_hd = re.search(r'descargar\-seriehd', url, flags=re.I)
  355. url_serie_vo = re.search(r'descargar\-serievo', url, flags=re.I)
  356.  
  357. if not title_hdtv and url_hdtv:
  358. title += ' HDTV'
  359. if not title_x264:
  360. title += ' x264'
  361. if not title_bluray and url_bluray:
  362. title += ' BluRay'
  363. if not title_x264:
  364. title += ' x264'
  365. if not title_1080p and url_1080p:
  366. title += ' 1080p'
  367. title_1080p = True
  368. if not title_720p and url_720p:
  369. title += ' 720p'
  370. title_720p = True
  371. if not (title_720p or title_1080p) and url_serie_hd:
  372. title += ' 720p'
  373. if not (title_vo) and url_serie_vo:
  374. title += ' [V.O.]'
  375. title_vo = True
  376.  
  377. # Language
  378. # title = re.sub(r'\[Spanish[^\[]*]', 'SPANISH AUDIO', title, flags=re.I)
  379. # title = re.sub(r'\[Castellano[^\[]*]', 'SPANISH AUDIO', title, flags=re.I)
  380. # title = re.sub(ur'\[Espa\u00f1ol[^\[]*]', 'SPANISH AUDIO', title, flags=re.I)
  381. # title = re.sub(ur'\[Espa\u00f1ol Castellano[^\[]*]', 'SPANISH AUDIO', title, flags=re.I)
  382. # title = re.sub(r'\[AC3 5\.1[^\[]*]', 'SPANISH AUDIO', title, flags=re.I)
  383. # title = re.sub(ur'\[AC3 5\.1 Espa\u00f1ol[^\[]*]', 'SPANISH AUDIO', title, flags=re.I)
  384. # title = re.sub(ur'\[AC3 5\.1 Espa\u00f1ol Castellano[^\[]*]', 'SPANISH AUDIO', title, flags=re.I)
  385.  
  386. if title_vo:
  387. title += ' -NEWPCTVO'
  388. else:
  389. title += ' -SPANISH AUDIO'
  390. title += ' -NEWPCT'
  391.  
  392. #propers handling
  393. title = re.sub(r'\(?proper\)?', '-PROPER', title, flags=re.I)
  394. title = re.sub(r'\(?repack\)?', '-REPACK', title, flags=re.I)
  395.  
  396. return self._clean_spaces(title)
  397.  
  398. def _process_season_episode(self, season_episode):
  399.  
  400. match = re.search(r'(\d)(\d{1,2})', season_episode, flags=re.I)
  401. if not match:
  402. match = re.search(r'(\d{2})(\d{2})', season_episode, flags=re.I)
  403.  
  404. season = match.group(1)
  405. episode = match.group(2).zfill(2)
  406.  
  407. return season, episode
  408.  
  409. def _clean_spaces(self, value):
  410.  
  411. value = value.strip()
  412. value = re.sub(r'[ ]+', ' ', value, flags=re.I)
  413. value = re.sub(r'\[[ ]+', '[', value, flags=re.I)
  414. value = re.sub(r'[ ]+\]', ']', value, flags=re.I)
  415. value = re.sub(r'\([ ]+', '(', value, flags=re.I)
  416. value = re.sub(r'[ ]+\)', ')', value, flags=re.I)
  417.  
  418. return value
  419.  
  420. provider = newpctProvider()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement