Advertisement
Guest User

newpct.py

a guest
Feb 23rd, 2018
467
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 20.08 KB | None | 0 0
  1. # coding=utf-8
  2. # Author: CristianBB
  3. # Greetings to Mr. Pine-apple
  4. # URL: https://sickrage.github.io
  5. #
  6. # This file is part of SickRage.
  7. #
  8. # SickRage is free software: you can redistribute it and/or modify
  9. # it under the terms of the GNU General Public License as published by
  10. # the Free Software Foundation, either version 3 of the License, or
  11. # (at your option) any later version.
  12. #
  13. # SickRage is distributed in the hope that it will be useful,
  14. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  15. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  16. # GNU General Public License for more details.
  17. #
  18. # You should have received a copy of the GNU General Public License
  19. # along with SickRage. If not, see <http://www.gnu.org/licenses/>.
  20.  
  21. from __future__ import print_function, unicode_literals
  22.  
  23. import re
  24.  
  25. from requests.compat import urljoin
  26.  
  27. from sickbeard import helpers, logger, tvcache
  28. from sickbeard.bs4_parser import BS4Parser
  29. from sickrage.helper.common import convert_size
  30. from sickbeard.show_name_helpers import allPossibleShowNames
  31. from sickrage.providers.torrent.TorrentProvider import TorrentProvider
  32.  
  33.  
  34. class newpctProvider(TorrentProvider):
  35.  
  36. def __init__(self):
  37.  
  38. TorrentProvider.__init__(self, 'Newpct')
  39.  
  40. self.onlyspasearch = None
  41.  
  42. self.url = 'http://www.newpct.com'
  43. self.urls = {'search': [ urljoin(self.url, '/series'),
  44. urljoin(self.url, '/series-hd')],
  45. #urljoin(self.url, '/series-vo')],
  46. 'rss': urljoin(self.url, '/feed'),
  47. 'letter': [ urljoin(self.url, '/series/letter/{0}'),
  48. urljoin(self.url, '/series-hd/letter/{0}')],
  49. #urljoin(self.url, '/series-vo/letter/{0}')],
  50. 'downloadregex': r'[^\"]*/descargar-torrent/\d+_[^\"]*',}
  51.  
  52. self.cache = tvcache.TVCache(self, min_time=20)
  53.  
  54. def _get_season_search_strings(self, ep_obj):
  55. search_string = {'Season': []}
  56.  
  57. for show_name in set(allPossibleShowNames(ep_obj.show)):
  58. search_string['Season'].append(show_name)
  59.  
  60. return [search_string]
  61.  
  62. def _get_episode_search_strings(self, ep_obj, add_string=''):
  63. search_string = {'Episode': []}
  64.  
  65. for show_name in set(allPossibleShowNames(ep_obj.show)):
  66. search_string['Episode'].append(show_name)
  67.  
  68. return [search_string]
  69.  
  70. def search(self, search_strings, age=0, ep_obj=None): # pylint: disable=too-many-locals
  71.  
  72. results = []
  73.  
  74. # Only search if user conditions are true
  75. lang_info = '' if not ep_obj or not ep_obj.show else ep_obj.show.lang
  76.  
  77. for mode in search_strings:
  78. items = []
  79. logger.log('Search Mode: {0}'.format(mode), logger.DEBUG)
  80.  
  81. if mode == 'RSS':
  82.  
  83. data = self.cache.get_rss_feed(self.urls['rss'], params=None)['entries']
  84. if not data:
  85. logger.log('Data returned from provider does not contain any torrents', logger.DEBUG)
  86. continue
  87.  
  88. for curItem in data:
  89. try:
  90.  
  91. title = curItem['title'].decode('utf8')
  92. download_url = curItem['link']
  93. if not all([title, download_url]):
  94. continue
  95.  
  96. title = self._processTitle(title, None, download_url)
  97. result = {'title': title, 'link': download_url}
  98.  
  99. items.append(result)
  100. except StandardError:
  101. continue
  102.  
  103. else:
  104.  
  105. # Only search if user conditions are true
  106. if self.onlyspasearch and lang_info != 'es':
  107. logger.log('Show info is not spanish, skipping provider search', logger.DEBUG)
  108. continue
  109.  
  110. letters = []
  111. series_names_lower = [x.lower() for x in search_strings[mode]]
  112.  
  113. #search series name
  114. for series_name in series_names_lower:
  115. name = series_name.lower().strip()
  116. if name and (name[0] not in letters):
  117. letters.append(name[0])
  118.  
  119. for letter in letters:
  120. for letter_url in self.urls['letter']:
  121. url = letter_url.format(letter) if not letter.isdigit() else letter_url.format('0-9')
  122.  
  123. try:
  124. data = self.get_url(url, params=None, returns='text')
  125. seriesparsed = self.parse_seriestitleurl(series_names_lower, data)
  126. if not len(seriesparsed):
  127. continue
  128.  
  129. for seriesparseditem in seriesparsed:
  130. pg = 1
  131. while pg < 100:
  132. try:
  133. data = self.get_url(seriesparseditem['url'] + '/pg/' + str(pg) , params=None, returns='text')
  134. items = self.parse(seriesparseditem['title'], data, mode)
  135. if not len(items):
  136. break
  137. results += items
  138. except Exception:
  139. logger.log('No data returned from provider', logger.DEBUG)
  140. break
  141.  
  142. pg += 1
  143.  
  144. except Exception as e:
  145. logger.log('No data returned from provider (letter) {0}'.format(str(e)), logger.DEBUG)
  146. continue
  147.  
  148. results += items
  149.  
  150. return results
  151.  
  152.  
  153. def parse_seriestitleurl(self, series_names, data):
  154. results = []
  155.  
  156. with BS4Parser(data) as html:
  157. series_table = html.find('ul', class_='pelilist')
  158. series_rows = series_table('li') if series_table else []
  159.  
  160. # Continue only if at least one series is found
  161. if not len(series_rows):
  162. return results
  163.  
  164. for row in series_rows:
  165. try:
  166. series_anchor = row.find_all('a')[0]
  167. title = series_anchor.get('title', '').lower()
  168. url = series_anchor.get('href', '')
  169. if title and title in series_names:
  170. item = {
  171. 'title': title,
  172. 'url': url,
  173. }
  174. results.append(item)
  175. except Exception as e:
  176. continue
  177.  
  178. return results
  179.  
  180.  
  181. def parse(self, series_name, data, mode):
  182. """
  183. Parse search results for items.
  184.  
  185. :param data: The raw response from a search
  186. :param mode: The current mode used to search, e.g. RSS
  187.  
  188. :return: A list of items found
  189. """
  190.  
  191. results = []
  192.  
  193. with BS4Parser(data) as html:
  194. torrent_table = html.find('ul', class_='buscar-list')
  195. torrent_rows = torrent_table('li') if torrent_table else []
  196.  
  197. # Continue only if at least one release is found
  198. if not len(torrent_rows):
  199. sickrage.app.srLogger.debug('Data returned from provider does not contain any torrents')
  200. return results
  201.  
  202. for row in torrent_rows:
  203. try:
  204. torrent_anchor = row.find_all('a')[1]
  205. title = torrent_anchor.get_text()
  206. download_url = torrent_anchor.get('href', '')
  207.  
  208. if not all([title, download_url]):
  209. continue
  210.  
  211. row_spans = row.find_all('span')
  212. size = convert_size(row_spans[-2].get_text().strip()) if row_spans and len(row_spans) >= 2 else 0
  213. seeders = 1 # Provider does not provide seeders
  214. leechers = 0 # Provider does not provide leechers
  215.  
  216. title = self._processTitle(title, series_name, download_url)
  217.  
  218. logger.log('Found: {0} # Size {1}'.format(title, size), logger.DEBUG)
  219.  
  220. item = {
  221. 'title': title,
  222. 'link': download_url,
  223. 'size': size,
  224. 'seeders': seeders,
  225. 'leechers': leechers,
  226. }
  227.  
  228. results.append(item)
  229.  
  230. except (AttributeError, TypeError):
  231. continue
  232.  
  233. return results
  234.  
  235.  
  236. def get_url(self, url, post_data=None, params=None, timeout=30, **kwargs): # pylint: disable=too-many-arguments
  237. """
  238. returns='content' when trying access to torrent info (For calling torrent client). Previously we must parse
  239. the URL to get torrent file
  240. """
  241. trickery = kwargs.pop('returns', '')
  242. if trickery == 'content':
  243. kwargs['returns'] = 'text'
  244. data = super(newpctProvider, self).get_url(url, post_data=post_data, params=params, timeout=timeout, **kwargs)
  245.  
  246. match = re.search(r'' + self.urls['downloadregex'], data, re.DOTALL)
  247. if not match:
  248. return None
  249. url = match.group()
  250.  
  251. kwargs['returns'] = trickery
  252. return super(newpctProvider, self).get_url(url, post_data=post_data, params=params,
  253. timeout=timeout, **kwargs)
  254.  
  255. def download_result(self, result):
  256. """
  257. Save the result to disk.
  258. """
  259.  
  260. # check for auth
  261. if not self.login():
  262. return False
  263.  
  264. urls, filename = self._make_url(result)
  265.  
  266. for url in urls:
  267. # Search results don't return torrent files directly, it returns show sheets so we must parse showSheet to access torrent.
  268. data = self.get_url(url, returns='text')
  269.  
  270. match = re.search(r'' + self.urls['downloadregex'], data, re.DOTALL)
  271. if not match:
  272. continue
  273. url_torrent = match.group()
  274.  
  275. if url_torrent.startswith('http'):
  276. self.headers.update({'Referer': '/'.join(url_torrent.split('/')[:3]) + '/'})
  277.  
  278. logger.log('Downloading a result from {0}'.format(url))
  279.  
  280. if helpers.download_file(url_torrent, filename, session=self.session, headers=self.headers):
  281. if self._verify_download(filename):
  282. logger.log('Saved result to {0}'.format(filename), logger.INFO)
  283. return True
  284. else:
  285. logger.log('Could not download {0}'.format(url), logger.WARNING)
  286. helpers.remove_file_failed(filename)
  287.  
  288. if urls:
  289. logger.log('Failed to download any results', logger.WARNING)
  290.  
  291. return False
  292.  
  293. def _processTitle(self, title, series_name, url, try_download = True):
  294.  
  295. # Newpct titles are very very very inconsistent.
  296.  
  297. # Check if title is well formatted (RSS titles usually are)
  298. # Examples:
  299. # FooSeries - Temporada 2 [HDTV 720p][Cap.204][AC3 5.1 Español Castellano]
  300. # Salvation - Temporada 1 [HDTV][Cap.104-107][Español Castellano]
  301.  
  302. # else try to match list format
  303. # example
  304. # Serie Juego De Tronos Temporada 7 Capitulo 5 - Español Castellano Calidad [ HDTV ]
  305. # Serie Juego De Tronos Temporada [7] Capitulo [5] - Español Castellano Calidad [ HDTV ]
  306.  
  307. # else process download page title
  308. # else compose from download url
  309.  
  310. series_name = series_name or ""
  311.  
  312. logger.log('newpct _processTitle: {} # series_name {} # url {}'.format(title, series_name, url), logger.DEBUG)
  313.  
  314. #clean spaces
  315. title = self._clean_spaces(title)
  316. series_name = self._clean_spaces(series_name)
  317.  
  318. title_stdformat = r'.+-.+\d{1,2}.+\[Cap.\d{2,4}([\-\_]\d{2,4})?\]'
  319. title_listformat = r'Serie ?(.+?) ?-? ?Temporada ?\[?(\d+)\]?.*Capitulos? ?\[?(\d+)\]? ?(al ?\[?(\d+)\]?)?.*- ?(.*) ?Calidad ?(.+)'
  320. title_urlformat = r'.*\/(.*)\/capitulo-(\d{2,4})\/'
  321.  
  322. title_is_proper = re.search(r'\b(proper|repack)', title, flags=re.I)
  323.  
  324. stdformat_match = re.search(title_stdformat, title, flags=re.I)
  325. if not stdformat_match:
  326. #Try to match list format
  327. listformat_match = re.search(title_listformat, title, flags=re.I)
  328. if listformat_match:
  329. if series_name:
  330. name = series_name + ((' (' + title_is_proper.group() + ')') if title_is_proper else "")
  331. else:
  332. name = self._clean_spaces(listformat_match.group(1))
  333. season = self._clean_spaces(listformat_match.group(2))
  334. episode = self._clean_spaces(listformat_match.group(3)).zfill(2)
  335. audioquality = self._clean_spaces(listformat_match.group(6))
  336. quality = self._clean_spaces(listformat_match.group(7))
  337.  
  338. if not listformat_match.group(5):
  339. title = "{0} - Temporada {1} {2} [Cap.{3}{4}]".format(name, season, quality, season, episode)
  340. else:
  341. episode_to = self._clean_spaces(listformat_match.group(5)).zfill(2)
  342. title = "{0} - Temporada {1} {2} [Cap.{3}{4}_{5}{6}]".format(name, season, quality, season, episode, season, episode_to)
  343. logger.log('_processTitle: Matched by listFormat: {}'.format(title), logger.DEBUG)
  344. else:
  345. if try_download:
  346. # Get title from the download page
  347. try:
  348. data = self.get_url(url, params=None, returns='text')
  349. with BS4Parser(data) as details:
  350. title = details.find('h1').get_text().split('/')[1]
  351. logger.log('_processTitle: Title got from details page: {}'.format(title), logger.DEBUG)
  352. return self._processTitle(title, series_name, url, False)
  353. except (AttributeError, TypeError):
  354. logger.error('title could not be retrived')
  355. else:
  356. # Try to compose title from url
  357. url_match = re.search(title_urlformat, url, flags=re.I)
  358. if url_match:
  359. name = series_name if series_name else url_match.group(1).replace('-', ' ')
  360. season, episode = self._process_season_episode(url_match.group(2))
  361. title = '{} - Temporada {} [][Cap.{}{}]'.format(name, season, season, episode)
  362. logger.log('_processTitle: Matched by url: {}'.format(title), logger.DEBUG)
  363. else:
  364. logger.log('_processTitle: Matched by stdFormat: {}'.format(title), logger.DEBUG)
  365.  
  366. # Quality - Use re module to avoid case sensitive problems with replace
  367. title = re.sub(r'\[HDTV 1080p?[^\[]*]', '1080p HDTV x264', title, flags=re.I)
  368. title = re.sub(r'\[HDTV 720p?[^\[]*]', '720p HDTV x264', title, flags=re.I)
  369. title = re.sub(r'\[ALTA DEFINICION 720p?[^\[]*]', '720p HDTV x264', title, flags=re.I)
  370. title = re.sub(r'\[HDTV]', 'HDTV x264', title, flags=re.I)
  371. title = re.sub(r'\[DVD[^\[]*]', 'DVDrip x264', title, flags=re.I)
  372. title = re.sub(r'\[BluRay 1080p?[^\[]*]', '1080p BluRay x264', title, flags=re.I)
  373. title = re.sub(r'\[BluRay Rip 1080p?[^\[]*]', '1080p BluRay x264', title, flags=re.I)
  374. title = re.sub(r'\[BluRay Rip 720p?[^\[]*]', '720p BluRay x264', title, flags=re.I)
  375. title = re.sub(r'\[BluRay MicroHD[^\[]*]', '1080p BluRay x264', title, flags=re.I)
  376. title = re.sub(r'\[MicroHD 1080p?[^\[]*]', '1080p BluRay x264', title, flags=re.I)
  377. title = re.sub(r'\[BLuRay[^\[]*]', '720p BluRay x264', title, flags=re.I)
  378. title = re.sub(r'\[BRrip[^\[]*]', '720p BluRay x264', title, flags=re.I)
  379. title = re.sub(r'\[BDrip[^\[]*]', '720p BluRay x264', title, flags=re.I)
  380.  
  381. #detect hdtv/bluray by url
  382. #hdtv 1080p example url: http://www.newpct.com/descargar-seriehd/foo/capitulo-610/hdtv-1080p-ac3-5-1/
  383. #hdtv 720p example url: http://www.newpct.com/descargar-seriehd/foo/capitulo-26/hdtv-720p-ac3-5-1/
  384. #hdtv example url: http://www.newpct.com/descargar-serie/foo/capitulo-214/hdtv/
  385. #bluray compilation example url: http://www.newpct.com/descargar-seriehd/foo/capitulo-11/bluray-1080p/
  386. title_hdtv = re.search(r'HDTV', title, flags=re.I)
  387. title_720p = re.search(r'720p', title, flags=re.I)
  388. title_1080p = re.search(r'1080p', title, flags=re.I)
  389. title_x264 = re.search(r'x264', title, flags=re.I)
  390. title_bluray = re.search(r'bluray', title, flags=re.I)
  391. title_vo = re.search(r'\[V.O.[^\[]*]', title, flags=re.I)
  392. url_hdtv = re.search(r'HDTV', url, flags=re.I)
  393. url_720p = re.search(r'720p', url, flags=re.I)
  394. url_1080p = re.search(r'1080p', url, flags=re.I)
  395. url_bluray = re.search(r'bluray', url, flags=re.I)
  396. url_serie_hd = re.search(r'descargar\-seriehd', url, flags=re.I)
  397. url_serie_vo = re.search(r'descargar\-serievo', url, flags=re.I)
  398.  
  399. if not title_hdtv and url_hdtv:
  400. title += ' HDTV'
  401. if not title_x264:
  402. title += ' x264'
  403. if not title_bluray and url_bluray:
  404. title += ' BluRay'
  405. if not title_x264:
  406. title += ' x264'
  407. if not title_1080p and url_1080p:
  408. title += ' 1080p'
  409. title_1080p = True
  410. if not title_720p and url_720p:
  411. title += ' 720p'
  412. title_720p = True
  413. if not (title_720p or title_1080p) and url_serie_hd:
  414. title += ' 720p'
  415. if not (title_vo) and url_serie_vo:
  416. title += ' [V.O.]'
  417. title_vo = True
  418.  
  419. # Language
  420. # title = re.sub(r'\[Spanish[^\[]*]', 'SPANISH AUDIO', title, flags=re.I)
  421. # title = re.sub(r'\[Castellano[^\[]*]', 'SPANISH AUDIO', title, flags=re.I)
  422. # title = re.sub(ur'\[Espa\u00f1ol[^\[]*]', 'SPANISH AUDIO', title, flags=re.I)
  423. # title = re.sub(ur'\[Espa\u00f1ol Castellano[^\[]*]', 'SPANISH AUDIO', title, flags=re.I)
  424. # title = re.sub(r'\[AC3 5\.1[^\[]*]', 'SPANISH AUDIO', title, flags=re.I)
  425. # title = re.sub(ur'\[AC3 5\.1 Espa\u00f1ol[^\[]*]', 'SPANISH AUDIO', title, flags=re.I)
  426. # title = re.sub(ur'\[AC3 5\.1 Espa\u00f1ol Castellano[^\[]*]', 'SPANISH AUDIO', title, flags=re.I)
  427.  
  428. if title_vo:
  429. title += ' -NEWPCTVO'
  430. else:
  431. title += ' -SPANISH AUDIO'
  432. title += ' -NEWPCT'
  433.  
  434. #propers handling
  435. title = re.sub(r'\(?proper\)?', '-PROPER', title, flags=re.I)
  436. title = re.sub(r'\(?repack\)?', '-REPACK', title, flags=re.I)
  437.  
  438. return self._clean_spaces(title)
  439.  
  440. def _process_season_episode(self, season_episode):
  441.  
  442. match = re.search(r'(\d)(\d{1,2})', season_episode, flags=re.I)
  443. if not match:
  444. match = re.search(r'(\d{2})(\d{2})', season_episode, flags=re.I)
  445.  
  446. season = match.group(1)
  447. episode = match.group(2).zfill(2)
  448.  
  449. return season, episode
  450.  
  451. def _clean_spaces(self, value):
  452.  
  453. value = value.strip()
  454. value = re.sub(r'[ ]+', ' ', value, flags=re.I)
  455. value = re.sub(r'\[[ ]+', '[', value, flags=re.I)
  456. value = re.sub(r'[ ]+\]', ']', value, flags=re.I)
  457. value = re.sub(r'\([ ]+', '(', value, flags=re.I)
  458. value = re.sub(r'[ ]+\)', ')', value, flags=re.I)
  459.  
  460. return value
  461.  
  462. provider = newpctProvider()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement