Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # -*- coding: utf-8 -*-
- '''
- Author : Huseyin BIYIK <husenbiyik at hotmail>
- Year : 2016
- License : GPL
- This program is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program. If not, see <http://www.gnu.org/licenses/>.
- '''
- import sublib
- import htmlement
- import urlparse
- import re
- import os
- domain = "http://www.turkcealtyazi.org"
- quals = {
- "1": 5, # good quality
- "2": 4, # enough quality
- "3": 0, # bad quality
- "4": 2, # not rated yet
- "5": 1, # waiting for source
- "6": 3, # archived
- }
- ripTypes = {
- "rps r1": "HD",
- "rps r2": "DVDRip",
- "rps r3": "?r3",
- "rps r4": "?r4",
- "rps r5": "?r5",
- "rps r6": "WEBRIP",
- "rps r7": "BDRip",
- "rps r8": "WEB-DL",
- "" : "N/A"
- }
- tranTypes = {
- "cps c1": "DVDRip",
- "cps c2": "?c2",
- "cps c3": "TVRip"
- }
- def norm(txt):
- txt = txt.replace(" ", "")
- txt = txt.lower()
- return txt
- def striphtml(txt):
- txt = re.sub("<.*?>", "", txt)
- txt = re.sub(r'\\t', "", txt)
- txt = re.sub("\n", "", txt)
- txt = txt.strip()
- #txt = txt.replace(" ", " ")
- return txt
- def elementsrc(element, exclude=[]):
- if element is None:
- return ""
- if element in exclude:
- return ""
- text = element.text or ''
- for subelement in element:
- text += elementsrc(subelement, exclude)
- text += element.tail or ''
- return striphtml(text)
- class turkcealtyazi(sublib.service):
- def search(self):
- self.found = False
- if self.item.imdb:
- self.find(self.item.imdb)
- if not self.num() and not self.item.show and self.item.year:
- self.find("%s %s" % (self.item.title, self.item.year))
- self._subs = []
- if not self.num():
- self.find(self.item.title)
- def checkpriority(self, txt):
- # this is a very complicated and fuzzy string work
- txt = txt.lower().replace(" ", "")
- cd = re.search("([0-9])cd", txt)
- # less the number of cds higher the priority
- if cd:
- return False, - int(cd.group(1))
- # rest is for episodes, if movie then return lowest prio.
- if self.item.episode < 0 or not self.item.show:
- return False, -100
- ispack = 0
- packmatch = 0
- epmatch = 0
- skip = False
- se = re.search("s(.+?)\|e(.+)", txt)
- if not se:
- se = re.search("s(.+?)(paket)", txt)
- if se:
- e = se.group(2)
- s = se.group(1)
- # verify season match first
- if s.isdigit() and self.item.season > 0 and \
- not self.item.season == int(s):
- return True, 0
- ismultiple = False
- # e: 1,2,3,4 ...
- for m in e.split(","):
- if m.strip().isdigit():
- ismultiple = True
- else:
- ismultiple = False
- break
- if ismultiple:
- # check if in range
- multiples = [int(x) for x in e.split(",")]
- if self.item.episode in multiples:
- packmatch = 2
- else:
- skip = True
- # e: 1~4
- if "~" in e:
- startend = e.split("~")
- # check if in range
- if len(startend) == 2 and \
- startend[0].strip().isdigit() and \
- startend[1].strip().isdigit():
- if int(startend[0]) < self.item.episode and \
- int(startend[1]) > self.item.episode:
- packmatch = 2
- else:
- skip = True
- else:
- ispack = 1
- # e: Paket meaning a package
- if e == "paket":
- ispack = 1
- # e:1 or e:01
- if e.isdigit():
- if int(e) == self.item.episode:
- epmatch = 3
- else:
- skip = True
- return skip, ispack + epmatch + packmatch
- def scraperesults(self, page, tree, query=None):
- for row in tree.findall(".//div[@class='nblock']/div/div[2]"):
- a = row.find(".//a")
- if a is None:
- continue
- link = a.get("href")
- name = a.get("title")
- years = row.findall(".//span")
- if len(years) > 1:
- ryear = re.search("([0-9]{4})", years[1].text)
- if ryear:
- year = int(ryear.group(1))
- if len(years) <= 1 or not ryear:
- year = "-1"
- if norm(name) == norm(self.item.title) and \
- (self.item.show or
- (self.item.year is None or self.item.year == year)):
- self.found = True
- p = self.request(domain + link)
- e = htmlement.fromstring(p)
- self.scrapepage(p, e)
- break
- if query and not self.found:
- pages = tree.findall(".//div[@class='pagin']/a")
- for page in pages:
- if "sonra" in page.text.lower():
- if self.found:
- break
- query = dict(urlparse.parse_qsl(urlparse.urlparse(page.get("href")).query))
- self.scraperesults(self.request(domain + "/find.php", query))
- def scrapepage(self, page, tree):
- subs = tree.findall(".//div[@id='altyazilar']/div/div")
- for s in subs:
- desc = s.find(".//div[@class='ripdiv']")
- xname = s.find(".//div[@class='fl']/a")
- alcd = s.find(".//div[@class='alcd']")
- if xname is None:
- continue
- if alcd is None:
- continue
- if desc is None:
- continue
- alcd = elementsrc(alcd)
- name = xname.find(".//strong").text
- link = xname.get("href")
- ripType = desc.find(".//span")
- if ripType is not None:
- ripType = ripType.get("class")
- ripType = ripTypes[ripType]
- else:
- ripType = ""
- desc = ripType + ' ' + elementsrc(desc)
- skip, priority = self.checkpriority(alcd)
- if skip:
- continue
- alcevirmen = s.find(".//div[@class='alcevirmen']")
- tran = ""
- alcevirmenA = alcevirmen.findall(".//a")
- alcevirmenSpan = alcevirmen.findall(".//span")
- tranCount = len(alcevirmenSpan) + 1
- if alcevirmenA:
- for alc in alcevirmenA:
- strong = alc.find(".//strong")
- span = strong.find(".//span")
- if strong is not None:
- if strong.text is not None:
- tran += strong.text
- elif span is not None:
- tran += tranTypes[span.get("class")]
- tranCount -= 1
- tranCount -= 1
- if tranCount > 0:
- tran += ' & '
- elif alcevirmenSpan:
- tran = tranTypes[alcevirmenSpan[0].get("class")]
- else:
- tran = alcevirmen.text
- iso = "tr"
- qualrate = "4"
- aldil = s.find(".//div[@class='aldil']/span")
- if aldil is not None:
- cls = aldil.get("class")
- riso = re.search('flag([a-z]{2})', cls)
- if riso is not None:
- iso = riso.group(1)
- qual = s.find(".//div[@class='fl']/span")
- if qual is not None:
- qual = qual.get("class")
- if isinstance(qual, (str, unicode)):
- qual = qual.replace("kal", "")
- if qual.isdigit():
- qualrate = qual
- namestr = "%s: %s, %s ~ %s" % (name, alcd, desc, tran)
- sub = self.sub(namestr, iso)
- sub.download(domain + link)
- sub.priority = priority
- if qual:
- sub.rating = quals[qualrate]
- self.addsub(sub)
- def find(self, query):
- q = {"cat": "sub", "find": query}
- page = self.request(domain + "/find.php", q)
- tree = htmlement.fromstring(page)
- title = tree.find(".//title")
- if title is not None:
- if "arama" in title.text.lower():
- self.scraperesults(page, tree, q)
- else:
- self.scrapepage(page, tree)
- else:
- self.scrapepage(page, tree)
- def download(self, link):
- page = self.request(link)
- tree = htmlement.fromstring(page)
- idid = tree.find(".//input[@name='idid']").get("value")
- alid = tree.find(".//input[@name='altid']").get("value")
- sdid = tree.find(".//input[@name='sidid']").get("value")
- data = {
- "idid": idid,
- "altid": alid,
- "sidid": sdid
- }
- remfile = self.request(domain + "/ind", None,
- data,
- domain,
- True,
- )
- fname = remfile.info().getheader("Content-Disposition")
- fname = re.search('filename=(.*)', fname)
- fname = fname.group(1)
- fname = os.path.join(self.path, fname)
- with open(fname, "wb") as f:
- f.write(remfile.read())
- self.addfile(fname)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement