Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from bs4 import BeautifulSoup as bs
- from urllib.request import urlopen
- from flask_cache import Cache
- import time
- import yaml
- import os
- import requests
- import re
- import pickle
- import sys
- from flask import Flask, render_template, flash, request, redirect, url_for, session, logging
- from wtforms import Form, StringField, TextAreaField, PasswordField, validators
- from functools import wraps
- app = Flask(__name__)
- cache = Cache(app,config={'CACHE_TYPE': 'simple'})
- @app.route('/')
- def index():
- return render_template('home.html')
- @app.route('/generate/')
- def generate():
- arenabg.save_list()
- zamunda.save_list()
- zelka.save_list()
- return render_template('home.html')
- @cache.cached(timeout=50)
- @app.route('/poster_gallery/', defaults={'genre': 'All', 'year': 'All'})
- @app.route('/poster_gallery/<string:genre>/<string:year>')
- def poster_gallery(genre, year):
- movies = arenabg.get_cached_list() + zamunda.get_cached_list() + zelka.get_cached_list()
- filtered_movies, movies_count, serials_count, genres, years = prepare_torrents(movies, genre, year, 'movie')
- return render_template('poster_gallery.html',
- movies = filtered_movies,
- movies_count = movies_count,
- genres = genres,
- years = years,
- genre = genre,
- year = year
- )
- @cache.cached(timeout=50)
- @app.route('/tv_serials/', defaults={'genre': 'All', 'year': 'All'})
- @app.route('/tv_serials/<string:genre>/<string:year>')
- def tv_serials(genre, year):
- serials = arenabg.get_cached_list() + zamunda.get_cached_list() + zelka.get_cached_list()
- filtered_serials, movies_count, serials_count, genres, years = prepare_torrents(serials, genre, year, 'serial')
- return render_template('tv_serials.html',
- movies = filtered_serials,
- serials_count = serials_count,
- genres = genres,
- years = years,
- genre = genre,
- year = year
- )
- def prepare_torrents(movies, genre, year, type):
- movies = separate(movies, type)
- movies = remove_dupl(movies)
- movies = sorted(movies, key = getKey, reverse = True)
- genres = get_genres(movies)
- filtered_movies = []
- if genre == 'All':
- filtered_movies = movies
- else:
- for movie in movies:
- if genre in str(movie['Genre']):
- filtered_movies.append(movie)
- movies = filtered_movies
- filtered_movies = []
- if year == 'All':
- filtered_movies = movies
- else:
- for movie in movies:
- if year in str(movie['Year']):
- filtered_movies.append(movie)
- years = get_years(movies)
- #debug vairables
- movies_count = 0
- serials_count = 0
- for movie in filtered_movies:
- if movie['Type'] == 'movie':
- movies_count += 1
- else:
- serials_count += 1
- return filtered_movies, movies_count, serials_count, genres, years
- # returns movies or serials only
- def separate(movies, type):
- separated = []
- for movie in movies:
- if movie['Type'] == type:
- separated.append(movie)
- return separated
- # used for soring list of dics
- def getKey(item):
- return item['imdbRating']
- # Removes torrents with duplicate IMDB ID
- def remove_dupl(movies):
- imdb_ids = []
- unique_movies = []
- for movie in movies:
- if movie['Imdb_id'] in imdb_ids:
- pass
- else:
- imdb_ids.append(movie['Imdb_id'])
- unique_movies.append(movie)
- return unique_movies
- # build genres list
- def get_genres(movies):
- geners = set()
- for movie in movies:
- for genre in movie['Genre']:
- if len(genre) > 0:
- geners.add(genre.strip())
- geners = list(geners)
- geners.sort()
- return geners
- # build genres list
- def get_years(movies):
- years = set()
- for movie in movies:
- if '–' in str(movie['Year']):
- pass
- else:
- years.add(movie['Year'])
- years = list(years)
- years.sort(reverse=True)
- return years
- #load configuratin variables
- CONFIG = open('config.yaml')
- CONFIG_DATA = yaml.safe_load(CONFIG)
- FETCH_DELAY = CONFIG_DATA['fetch_delay']
- TRACKER_CACHE_FOLDER = CONFIG_DATA['tracker_cache_folder']
- IMDB_CACHE_FOLDER = CONFIG_DATA['imdb_cache_folder']
- DEBUG = CONFIG_DATA['debug']
- PAGES_TO_SCAN = CONFIG_DATA['pages_to_scan']
- MISSING_IMDB_POSTER = "/static/images/poster.jpg"
- #arenabg.com
- ARENABG_FORM_DATA = CONFIG_DATA['arenabg_form_data']
- ARENABG_LOGIN_URL = CONFIG_DATA['arenabg_login_url']
- ARENABG_INTERNAL_URL = CONFIG_DATA['arenabg_internal_url']
- #zamunda.net
- ZAMUNDA_INTERNAL_URL = CONFIG_DATA['zamunda_internal_url']
- ZAMUNDA_LOGIN_URL = CONFIG_DATA['zamunda_login_url']
- ZAMUNDA_FORM_DATA = CONFIG_DATA['zamunda_form_data']
- #zelka.org
- ZELKA_INTERNAL_URL = CONFIG_DATA['zelka_internal_url']
- ZELKA_LOGIN_URL = CONFIG_DATA['zelka_login_url']
- ZELKA_FORM_DATA = CONFIG_DATA['zelka_form_data']
- class Arenabg():
- tracker_urls_served = 0
- imdb_urls_served = 0
- total_movies = 0
- def __init__(self, login_url, user_data):
- self.user_data = user_data
- self.login_url = login_url
- self.tracker_login_session = self.tracker_login()
- self.tracker_name = self.login_url.split('/')[2]
- # create and return the torrent site login session
- def tracker_login(self):
- tracker_login_session = requests.Session()
- tracker_login_session.post(self.login_url, data=self.user_data)
- return tracker_login_session
- #returns all torrent details urls
- def get_movies(self):
- subs_url = ARENABG_INTERNAL_URL + 'subtitles:1/page:'
- bg_audio_url = ARENABG_INTERNAL_URL + 'audio:1/page:'
- pageurls = []
- for i in range(1, PAGES_TO_SCAN + 1 , 1):
- pageurls.append(subs_url + str(i))
- for i in range(1, PAGES_TO_SCAN + 1 , 1):
- pageurls.append(bg_audio_url + str(i))
- urls = set()
- for url in pageurls:
- if DEBUG:
- print('Processing {} urls - current {}'.format(len(pageurls),url))
- self.tracker_urls_served += 1
- response = self.tracker_login_session.get(url)
- html = response.text
- soup = bs(html, 'html.parser')
- for link in soup.findAll('a',attrs={'class': 'torrent-link'}):
- urls.add('http://arenabg.com' + link['href'])
- time.sleep(FETCH_DELAY)
- self.total_movies = len(urls)
- return urls
- #return the imdb-id of arenabg url
- def get_movie_imdb_id(self, url):
- self.tracker_urls_served += 1
- file_name = re.sub('[^A-Za-z0-9]+', '', url)
- try:
- imdb_id = pickle.load(open(self.get_path() + '/arenabg/' + file_name, 'rb'))
- return imdb_id
- except:
- response = self.tracker_login_session.get(url)
- html = response.text
- link = []
- soup = bs(html, 'html.parser')
- for link in soup.findAll('a'):
- if 'www.imdb.com' in str(link):
- pickle.dump(link['href'].split('/')[4],
- open(self.get_path()
- + '/arenabg/{}'.format(file_name), "wb"))
- imdb_id = link['href'].split('/')[4].strip()
- if imdb_id[:2] == 'tt':
- if DEBUG:
- print(imdb_id)
- return imdb_id
- else:
- print("invalid imdbid", imdb_id)
- return False
- pickle.dump( False, open(self.get_path()+ '/arenabg/'+ file_name, 'wb'))
- time.sleep(FETCH_DELAY)
- return False
- @staticmethod
- #returns script location path
- def get_path():
- return os.path.dirname(os.path.realpath(sys.argv[0]))
- #extract movie data from IMDB site
- @staticmethod
- def get_media_imdb(imdbid):
- Arenabg.imdb_urls_served += 1
- try:
- result = pickle.load(open(Arenabg.get_path()+ '/imdb/'+ imdbid, 'rb'))
- new = False
- return result, new
- except:
- new = True
- url = 'http://www.imdb.com/title/{}/'.format(imdbid)
- try:
- soup = bs(urlopen(url), 'html.parser')
- time.sleep(FETCH_DELAY)
- except:
- return None, False
- # parse rating
- ss = soup.findAll('span',attrs={'itemprop':'ratingValue'})
- try:
- rating = str(ss).split(">")[1].split("<")[0]
- except:
- rating = 0
- # parse poster
- try:
- poster = soup.findAll('div',attrs={'class':'poster'})
- poster = str(poster).split('"')[9]
- except:
- poster = MISSING_IMDB_POSTER
- # parse genre
- gen = soup.findAll('div',attrs={'class':'subtext'})
- x = bs(str(gen), "html.parser")
- gen = x.findAll('span',attrs={'class':'itemprop'})
- genre = re.sub('<[^<]+?>', '', str(gen))
- genre = genre.strip('[').strip(']').strip(' ').split(",")
- # parse title
- t = soup.findAll('title')
- title = t[0].get_text().split(' (')[0]
- if len(title) < 1:
- return None, False
- # parse year
- try:
- type = 'movie'
- title_year = soup.find('span', {'id': 'titleYear'}).find('a').get_text()
- except:
- type = 'serial'
- try:
- title_year = soup.find('a', {'title': 'See more release dates'})
- title_year = title_year.get_text().strip().split('(')[1].split(')')[0]
- except:
- try:
- title_year = str(soup.find('span', {'class': 'parentDate'})
- .get_text()
- .strip()
- )
- title_year = title_year.replace('(', '').replace(')', '')
- except:
- title_year = None
- result = {'Imdb_id': imdbid, 'Type': type,
- 'Year': title_year, 'Title': title,
- 'imdbRating': rating, 'Poster': poster,
- 'Genre': genre
- }
- pickle.dump(result,
- open('{}/imdb/{}'.format(Arenabg.get_path(), imdbid),
- 'wb' ))
- return result, new
- #returns a lists of movies with their imdb data
- def get_list(self):
- list = []
- imdb_ids_list = []
- for movie in self.get_movies():
- if DEBUG:
- print('total urls: {} - procesed urls: {} - imdb urls: {}'
- .format(self.total_movies,
- self.tracker_urls_served,
- self.imdb_urls_served))
- imdb_id = self.get_movie_imdb_id(movie)
- if imdb_id :
- if imdb_id in imdb_ids_list:
- pass
- else:
- movie_data, is_movie_new = self.get_media_imdb(imdb_id)
- if DEBUG:
- print(movie)
- if movie_data and imdb_id:
- movie_data['url'] = movie
- movie_data['New'] = is_movie_new
- list.append(movie_data)
- imdb_ids_list.append(imdb_id)
- return list
- def save_list(self):
- def getKey(item):
- return item['imdbRating']
- movies = sorted(self.get_list(), key=getKey, reverse = True)
- # movies = self.get_list()
- pickle.dump(movies, open(self.get_path()
- + '/data/' + self.tracker_name, 'wb'))
- def get_cached_list(self):
- result = pickle.load(open(Arenabg.get_path()
- + '/data/' + self.tracker_name, 'rb'))
- try:
- result = pickle.load(open(Arenabg.get_path()
- + '/data/' + self.tracker_name, 'rb'))
- return result
- except:
- return False
- class Zamunda(Arenabg):
- #get tracker movies urls
- def get_movies(self):
- pageurls = []
- #Builds a list of urls to be scanned - bg subs
- for i in range(0, PAGES_TO_SCAN , 1):
- pageurls.append(ZAMUNDA_INTERNAL_URL + '?field=name&bgsubs=1&page=' + str(i))
- #Builds a list of urls to be scanned - bg audio
- for i in range(0, PAGES_TO_SCAN , 1):
- pageurls.append(ZAMUNDA_INTERNAL_URL + '?field=name&bgaudio=1&page=' + str(i))
- urls = set()
- for url in pageurls: #gets the links from each url
- if DEBUG:
- print('Processing {} urls - current {}'.format(len(pageurls),url))
- response = self.tracker_login_session.get(url)
- html = response.text
- soup = bs(html, 'html5lib')
- for link in soup.findAll('a',href=True):
- if 'banan?id=' in link['href'] and 'javascript' not in link['href']:
- if not link['href'].startswith('/'):
- try:
- urls.add('http://zamunda.net/' + link['href'].split('&')[0])
- except:
- urls.add('http://zamunda.net/' + link['href'])
- time.sleep(FETCH_DELAY)
- self.total_movies = len(urls)
- return urls
- class Zelka(Arenabg):
- #get tracker movies urls
- def get_movies(self):
- pageurls = []
- for i in range(1, PAGES_TO_SCAN + 1, 1):
- pageurls.append(ZELKA_INTERNAL_URL
- + '?page='
- + str(i)
- + '&sort=12&type=desc'
- )
- urls = set()
- for url in pageurls:
- if DEBUG:
- print('Processing {} urls - current {}'.format(len(pageurls),url))
- response = self.tracker_login_session.get(url)
- html = response.text
- soup = bs(html, 'html5lib')
- # get all torrent details urls on the current page
- for link in soup.findAll('a', href = True):
- if 'details.php?id=' in link['href'] and 'hit' not in link['href']:
- if 'userdetails' not in link['href']:
- urls.add('http://zelka.org/' + link['href'])
- time.sleep(FETCH_DELAY)
- self.total_movies = len(urls)
- return urls
- if __name__ == '__main__':
- arenabg = Arenabg(ARENABG_LOGIN_URL, ARENABG_FORM_DATA)
- zamunda = Zamunda(ZAMUNDA_LOGIN_URL, ZAMUNDA_FORM_DATA)
- zelka = Zelka(ZELKA_LOGIN_URL, ZELKA_FORM_DATA)
- app.secret_key='secret123'
- app.run(debug=True,host='0.0.0.0')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement