Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- '''
- Author : Huseyin BIYIK <husenbiyik at hotmail>
- Year : 2016
- License : GPL
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- '''
- import sublib
- import re
- import os
- domain = "https://www.planetdp.org"
- dptoiso = {
- "tr": "tr",
- "en": "en",
- "sp": "es",
- "gr": "de",
- "fr": "fr",
- }
- isotoquery = {
- "tr": u"Türkçe",
- "en": u"İngilizce",
- "es": u"İspanyolca",
- "de": u"Almanca",
- "fr": u"Fransızca",
- }
- quals = {
- "sub_checked": 5, # Onaylandı
- "sub_checked_orange": 3, # Teknik Yapı Hataları, Hafif İmla Hataları, Ekstrem Durum
- "sub_checked_red": 1, # Teknik Yapı Hataları, Ağır İmla Hataları, Çeviri Zayıf Düzeyde
- }
- def norm(txt):
- txt = txt.replace(" ", "")
- txt = txt.lower()
- return txt
- class planetdp(sublib.service):
- def search(self):
- if self.item.imdb and self.item.imdb.startswith("tt"):
- self.searchimdb()
- if not self.num() and self.item.year:
- self.searchnameyear()
- if not self.num():
- self.searchpredict()
- def download(self, link):
- page = self.request(link)
- token = re.search('<input type="hidden" name="_token" value="(.*?)">', page)
- subid = re.search('rel\-id="(.*?)"', page)
- uniqk = re.search('rel\-tag="(.*?)"', page)
- if token and subid and uniqk:
- data = {
- "_token": token.group(1),
- "subtitle_id": subid.group(1),
- "uniquekey": uniqk.group(1)
- }
- remfile = self.request(domain + "/subtitle/download", None, data, link, True)
- fname = remfile.info().getheader("Content-Disposition")
- fname = re.search('filename=(.*)', fname)
- fname = fname.group(1)
- fname = os.path.join(self.path, fname)
- with open(fname, "wb") as f:
- f.write(remfile.read())
- self.addfile(fname)
- def checkpriority(self, txt):
- # this is a very complicated and fuzzy string work
- if self.item.episode < 0 or not self.item.show:
- return False, 0
- ispack = 0
- packmatch = 0
- epmatch = 0
- skip = False
- sb = re.search("S:<b>(.+?)<\/b>[-|,]B:<b>(.+?)<\/b>", txt)
- if not sb:
- sb = re.search(".*?<b>S<\/b>\:(.+?)[-|,]<b>B<\/b>\:(.+)", txt)
- if sb:
- e = sb.group(2).strip().replace(" ", "").lower()
- s = sb.group(1).strip().replace(" ", "").lower()
- # verify season match first
- if s.isdigit() and self.item.season > 0 and \
- not self.item.season == int(s):
- return True, 0
- ismultiple = False
- # B: 1,2,3,4 ...
- for m in e.split(","):
- if m.strip().isdigit():
- ismultiple = True
- else:
- ismultiple = False
- break
- if ismultiple:
- # check if in range
- multiples = [int(x) for x in e.split(",")]
- if self.item.episode in multiples:
- packmatch = 2
- else:
- skip = True
- # B: 1~4
- if "~" in e:
- startend = e.split("~")
- # check if in range
- if len(startend) == 2 and \
- startend[0].strip().isdigit() and \
- startend[1].strip().isdigit():
- if int(startend[0]) < self.item.episode and \
- int(startend[1]) > self.item.episode:
- packmatch = 2
- else:
- skip = True
- else:
- ispack = 1
- # B: Paket meaning a package
- if "paket" in e:
- ispack = 1
- # B:1 or B:01
- if e.isdigit():
- if int(e) == self.item.episode:
- epmatch = 3
- else:
- skip = True
- return skip, ispack + epmatch + packmatch
- def scrapesubs(self, page):
- for row in re.findall("<tr(.*?)</tr>", page, re.DOTALL):
- index = 0
- link = None
- name = None
- iso = None
- priority = 0
- quality = 0
- for column in re.findall("<td(.*?)</td>", row, re.DOTALL):
- index += 1
- if index == 1:
- res = re.search('href="(.*?)".*?title="(.*?)">(.*?)<(.*)', column)
- link = domain + res.group(1)
- skip, priority = self.checkpriority(res.group(4))
- if skip:
- break
- if self.item.show:
- name = "%s" % (res.group(2))
- else:
- name = "%s: %s" % (res.group(3), res.group(2))
- if index == 2:
- res = re.search("<img src='(.*?)'", column)
- if res:
- country = res.group(1).split("/")[-1].split(".")[0][-2:]
- iso = dptoiso[country]
- else:
- iso = "tr"
- if index == 3:
- res = re.search("<a.*?>(.*?)</a", column)
- name += " ~ %s" % res.group(1)
- if index == 9:
- qual = re.search('<i.*?_cls (.*?)".*<\/i', column)
- if qual:
- quality = quals[qual.group(1)]
- if link and iso and name:
- sub = self.sub(name, iso)
- sub.download(link)
- sub.priority = priority
- sub.rating = quality
- self.addsub(sub)
- def scrapemovie(self, page):
- movieFound = False
- regstr = '<tr(.*?)</tr>'
- for row in re.findall(regstr, page, re.DOTALL):
- index = 0
- if not movieFound:
- link = None
- name = None
- iso = None
- trans = None
- priority = 0
- quality = 0
- for column in re.findall("<td(.*?)</td>", row, re.DOTALL):
- skip = None
- index += 1
- if index == 1:
- res = re.search('href="(.*?)".*?>(.*?)\\n', column)
- if res:
- link = domain + res.group(1)
- name = "%s" % res.group(2)
- # movieFound = True
- # release = re.search("<span>(.*?)</span>", column)
- # if release:
- # release = re.sub("<.*?>", "", release.group(1))
- # skip, priority = self.checkpriority(release)
- skip, priority = self.checkpriority(column)
- if skip:
- # name = None
- break
- else:
- movieFound = True
- if index == 2:
- res = re.search("src='(.*?)'", column)
- if res:
- country = res.group(1).split("/")[-1].split(".")[0][-2:]
- iso = dptoiso[country]
- else:
- iso = "tr"
- if index == 7:
- res = re.search('itemprop="translator">\\n(.*?)\\n', column)
- if res:
- trans = res.group(1)
- if index == 9:
- qual = re.search('<i.*?_cls (.*?)".*<\/i', column)
- if qual:
- quality = quals[qual.group(1)]
- else:
- movieFound = False
- release = re.search('<strong>(.*?)</strong>', row)
- if release:
- release = release.group(1)
- name += ": %s" % release
- if link and name and iso:
- if trans:
- name += " ~ %s" % trans
- sub = self.sub(name, iso)
- sub.download(link)
- sub.rating = quality
- self.priority = priority
- self.addsub(sub)
- def scraperesult(self, page):
- # if we are here we must have a year
- divs = re.findall('<div class="col-md-9 col-sm-9 translate_list-right2">(.*?)</span>[\r\n|\r|\n]<a', page, re.DOTALL)
- nname = norm(self.item.title).decode('utf-8')
- for div in divs:
- rlinkname = re.search('<a href="(.*?)"><h4>(.*?)<\/h4><\/a>(\r\n|\r|\n)<h5>.*?([0-9]{4}).*?<\/h5>', div)
- if rlinkname:
- link = rlinkname.group(1)
- name = rlinkname.group(2)
- year = rlinkname.group(4)
- if year.isdigit():
- year = int(year)
- else:
- continue
- akas = []
- aka = re.search('Aka:</span>(.*?)<br>', div)
- if aka:
- akas = [norm(x.strip()) for x in aka.group(1).split(",")]
- if year == self.item.year and \
- (nname == norm(name) or nname in akas):
- page = self.request(domain + link)
- self.scrapemovie(page)
- break
- def searchimdb(self):
- if self.item.season < 0:
- season = ""
- else:
- season = self.item.season
- query = {
- "title": self.item.imdb,
- "translator": "",
- "name": "",
- "release_info": "",
- "fps": "",
- "season": season,
- "episode": "",
- "lang": "",
- }
- page = self.request(domain + "/subtitlelist", query)
- return self.scrapesubs(page)
- def searchnameyear(self):
- # if we are here we must have a year
- query = {
- "title": self.item.title,
- "year_date": self.item.year,
- "is_serial": int(self.item.show)
- }
- page = self.request(domain + "/movie/search", query)
- title = re.search("<title>(.*?)</title>", page)
- if title:
- res = title.group(1)
- if title.group(1) == u' Arama Sonuçları ':
- return self.scraperesult(page)
- return self.scrapemovie(page)
- # if ismultiple:
- # return self.scraperesult(page)
- # else:
- # return self.scrapemovie(page)
- def searchpredict(self):
- if self.item.season < 0:
- season = ""
- else:
- season = self.item.season
- query = {
- "title": self.item.title,
- "translator": "",
- "name": "",
- "release_info": "",
- "fps": "",
- "season": season,
- "episode": "",
- "lang": "",
- }
- page = self.request(domain + "/subtitlelist", query)
- return self.scrapesubs(page)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement