Advertisement
Guest User

Untitled

a guest
Feb 24th, 2013
1,304
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 16.80 KB | None | 0 0
  1. # -*- coding: UTF-8 -*-
  2.  
  3. import os, sys, re, xbmc, xbmcgui, string, time, urllib, urllib2
  4. from utilities import languageTranslate, log
  5.  
  6. main_url = "http://v2.subscene.com/"
  7. debug_pretext = ""
  8.  
  9. # Seasons as strings for searching
  10. seasons = ["Specials", "First", "Second", "Third", "Fourth", "Fifth", "Sixth", "Seventh", "Eighth", "Ninth", "Tenth"]
  11. seasons = seasons + ["Eleventh", "Twelfth", "Thirteenth", "Fourteenth", "Fifteenth", "Sixteenth", "Seventeenth", "Eighteenth", "Nineteenth", "Twentieth"]
  12. seasons = seasons + ["Twenty-first", "Twenty-second", "Twenty-third", "Twenty-fourth", "Twenty-fifth", "Twenty-sixth", "Twenty-seventh", "Twenty-eighth", "Twenty-ninth"]
  13.  
  14. #====================================================================================================================
  15. # Regular expression patterns
  16. #====================================================================================================================
  17.  
  18. # subtitle pattern example:
  19. """
  20.         <tr>
  21.             <td>
  22.                 <a class="a1" href="/arabic/Magnolia/subtitle-311056.aspx" title="Subtitle - Magnolia  - Arabic">
  23.                     <span class="r0" >
  24.  
  25.                         Arabic
  26.                     </span>
  27.                      <span id="r311056">Magnolia.1999.720p.BluRay.x264-LEVERAGE</span>
  28.                 </a>
  29.  
  30.  
  31.  
  32.             </td>
  33.             <td class="a3">1
  34.             </td>
  35.             <td><div id=imgEar title='Hearing Impaired'>&nbsp;</div>
  36.             </td>
  37.  
  38. """
  39. subtitle_pattern = "..<tr>.{5}<td>.{6}<a class=\"a1\" href=\"/([^\n\r]{10,200}?-\d{3,10}.aspx)\" title=\"[^\n\r]{10,200}\">\
  40. [\r\n\t ]+?<span class=\"r(0|100)\" >[\r\n\t\ ]+([^\r\n\t]+?) [\r\n\t]+</span>[\r\n\t ]+?<span id=\"r\d+\">([^\r\n\t]{5,500})</span>\
  41. [\r\n\t]+?</a>[\r\n\t ]+?</td>[\r\n\t ]+?<td class=\"a3\">1[\r\n\t\ ]+?</td>[\r\n\t\ ]+?<td>(|.{52})\r\n\t\t\t</td>"
  42. # group(1) = downloadlink, group(2) = qualitycode, group(3) = language, group(4) = filename, group(5) = hearing impaired
  43.  
  44.  
  45. # movie/seasonfound pattern example:
  46. """
  47.             <a href="/inception/subtitles-80839.aspx" class=popular>
  48.                 Inception (2010)
  49.                 <dfn>(327)</dfn>
  50. """
  51. movie_season_pattern = "...<a href=\"([^\n\r\t]*?/subtitles-\d{1,10}.aspx)\".{1,14}>\r\n{0,3}.{4}([^\n\r\t\&#]*?) \((\d\d\d\d)\) \r\n{0,3}.{1,4}<dfn>\(.{1,5}</dfn>"
  52. # group(1) = link, group(2) = movie_season_title,  group(3) = year
  53.  
  54.  
  55.  
  56.  
  57. # download link pattern example:
  58. """
  59.         <a href="/subtitle/download?mac=LxawhQiaMYm9O2AsoNMHXbXDYN2b4yBreI8TJIBfpdw7UIo1JP5566Sbb2ei_zUC0" rel="nofollow" onclick="DownloadSubtitle(this)" id="downloadButton" class="button Positive">
  60. """
  61. downloadlink_pattern = "...<a href=\"(.+?)\" rel=\"nofollow\" onclick=\"DownloadSubtitle"
  62. # group(1) = link
  63.  
  64. # <input type="hidden" name="__VIEWSTATE" id="__VIEWSTATE" value="/wEPDwUKLTk1MDk4NjQwM2Rk5ncGq+1a601mEFQDA9lqLwfzjaY=" />
  65. viewstate_pattern = "<input type=\"hidden\" name=\"__VIEWSTATE\" id=\"__VIEWSTATE\" value=\"([^\n\r\t]*?)\" />"
  66.  
  67. # <input type="hidden" name="__PREVIOUSPAGE" id="__PREVIOUSPAGE" value="V1Stm1vgLeLd6Kbt-zkC8w2" />
  68. previouspage_pattern = "<input type=\"hidden\" name=\"__PREVIOUSPAGE\" id=\"__PREVIOUSPAGE\" value=\"([^\n\r\t]*?)\" />"
  69.  
  70. # <input type="hidden" name="subtitleId" id="subtitleId" value="329405" />
  71. subtitleid_pattern = "<input type=\"hidden\" name=\"subtitleId\" id=\"subtitleId\" value=\"(\d+?)\" />"
  72.  
  73. # <input type="hidden" name="typeId" value="zip" />
  74. typeid_pattern = "<input type=\"hidden\" name=\"typeId\" value=\"([^\n\r\t]{3,15})\" />"
  75.  
  76. # <input type="hidden" name="filmId" value="78774" />
  77. filmid_pattern = "<input type=\"hidden\" name=\"filmId\" value=\"(\d+?)\" />"
  78.  
  79.  
  80. #====================================================================================================================
  81. # Functions
  82. #====================================================================================================================
  83.  
  84. def to_subscene_lang(language):
  85.     if language == "Chinese":            return "Chinese BG code"
  86.     elif language == "PortugueseBrazil": return "Brazillian Portuguese"
  87.     elif language == "SerbianLatin":     return "Serbian"
  88.     elif language == "Ukrainian":        return "Ukranian"
  89.     else:                                return language
  90.  
  91.  
  92.  
  93. def find_movie(content, title, year):
  94.     url_found = None
  95.     for matches in re.finditer(movie_season_pattern, content, re.IGNORECASE | re.DOTALL):
  96.         log( __name__ ,"%s Found movie on search page: %s (%s)" % (debug_pretext, matches.group(2), matches.group(3)))
  97.         if string.find(string.lower(matches.group(2)),string.lower(title)) > -1:
  98.             if matches.group(3) == year:
  99.                 log( __name__ ,"%s Matching movie found on search page: %s (%s)" % (debug_pretext, matches.group(2), matches.group(3)))
  100.                 url_found = matches.group(1)
  101.                 break
  102.     return url_found
  103.  
  104.  
  105. def find_tv_show_season(content, tvshow, season):
  106.     url_found = None
  107.     for matches in re.finditer(movie_season_pattern, content, re.IGNORECASE | re.DOTALL):
  108.         log( __name__ ,"%s Found tv show season on search page: %s" % (debug_pretext, matches.group(2).decode("utf-8")))
  109.         if string.find(string.lower(matches.group(2)),string.lower(tvshow) + " ") > -1:
  110.             if string.find(string.lower(matches.group(2)),string.lower(season)) > -1:
  111.                 log( __name__ ,"%s Matching tv show season found on search page: %s" % (debug_pretext, matches.group(2).decode("utf-8")))
  112.                 url_found = matches.group(1)
  113.                 break
  114.     return url_found
  115.  
  116.  
  117. def getallsubs(response_url, content, language, title, subtitles_list, search_string):
  118.     for matches in re.finditer(subtitle_pattern, content, re.IGNORECASE | re.DOTALL):
  119.         languagefound = matches.group(3)
  120.         if languagefound == to_subscene_lang(language):
  121.             link = main_url + matches.group(1)
  122.             languageshort = languageTranslate(language,0,2)
  123.             filename   = matches.group(4)
  124.             hearing_imp = len(matches.group(5)) > 0
  125.             if search_string != "":
  126.                 log( __name__ , "string.lower(filename) = >" + string.lower(filename) + "<" )
  127.                 log( __name__ , "string.lower(search_string) = >" + string.lower(search_string) + "<" )
  128.                 if string.find(string.lower(filename),string.lower(search_string)) > -1:
  129.                     log( __name__ ,"%s Subtitles found: %s, %s" % (debug_pretext, languagefound, filename))
  130.                     subtitles_list.append({'rating': '0', 'movie':  title, 'filename': filename, 'sync': False, 'link': link, 'language_flag': 'flags/' + languageshort + '.gif', 'language_name': language, 'hearing_imp': hearing_imp})
  131.             else:
  132.                 log( __name__ ,"%s Subtitles found: %s, %s" % (debug_pretext, languagefound, filename))
  133.                 subtitles_list.append({'rating': '0', 'movie':  title, 'filename': filename, 'sync': False, 'link': link, 'language_flag': 'flags/' + languageshort + '.gif', 'language_name': language, 'hearing_imp': hearing_imp})
  134.  
  135.  
  136. def geturl(url):
  137.     log( __name__ ,"%s Getting url:%s" % (debug_pretext, url))
  138.     try:
  139.         response   = urllib2.urlopen(url)
  140.         content    = response.read()
  141.         #Fix non-unicode charachters in movie titles
  142.         strip_unicode = re.compile("([^-_a-zA-Z0-9!@#%&=,/'\";:~`\$\^\*\(\)\+\[\]\.\{\}\|\?\<\>\\]+|[^\s]+)")
  143.         content    = strip_unicode.sub('', content)
  144.         return_url = response.geturl()
  145.     except:
  146.         log( __name__ ,"%s Failed to get url:%s" % (debug_pretext, url))
  147.         content    = None
  148.         return_url = None
  149.     return(content, return_url)
  150.  
  151.  
  152. def search_subtitles( file_original_path, title, tvshow, year, season, episode, set_temp, rar, lang1, lang2, lang3, stack ): #standard input
  153.     subtitles_list = []
  154.     msg = ""
  155.     if len(tvshow) == 0:
  156.         search_string = title
  157.     if len(tvshow) > 0:
  158.         search_string = tvshow + " - " + seasons[int(season)] + " Season"
  159.     log( __name__ ,"%s Search string = %s" % (debug_pretext, search_string))
  160.     url = main_url + "filmsearch.aspx?q=" + urllib.quote_plus(search_string)
  161.     content, response_url = geturl(url)
  162.     if content is not None:
  163.         if re.search("subtitles-\d{2,10}\.aspx", response_url, re.IGNORECASE):
  164.             log( __name__ ,"%s One movie found, getting subs ..." % debug_pretext)
  165.             getallsubs(response_url, content, lang1, title, subtitles_list,  "")
  166.             if (lang2 != lang1): getallsubs(response_url, content, lang2, title, subtitles_list, "")
  167.             if ((lang3 != lang2) and (lang3 != lang1)): getallsubs(response_url, content, lang3, title, subtitles_list, "")
  168.         else:
  169.             if len(tvshow) == 0:
  170.                 log( __name__ ,"%s Multiple movies found, searching for the right one ..." % debug_pretext)
  171.                 subspage_url = find_movie(content, title, year)
  172.                 if subspage_url is not None:
  173.                     log( __name__ ,"%s Movie found in list, getting subs ..." % debug_pretext)
  174.                     url = main_url + subspage_url
  175.                     content, response_url = geturl(url)
  176.                     if content is not None:
  177.                         getallsubs(response_url, content, lang1, title, subtitles_list, "")
  178.                         if (lang2 != lang1): getallsubs(response_url, content, lang2, title, subtitles_list, "")
  179.                         if ((lang3 != lang2) and (lang3 != lang1)): getallsubs(response_url, content, lang3, title, subtitles_list, "")
  180.                 else:
  181.                     log( __name__ ,"%s Movie not found in list: %s" % (debug_pretext, title))
  182.                     if string.find(string.lower(title),"&") > -1:
  183.                         title = string.replace(title, "&", "and")
  184.                         log( __name__ ,"%s Trying searching with replacing '&' to 'and': %s" % (debug_pretext, title))
  185.                         subspage_url = find_movie(content, title, year)
  186.                         if subspage_url is not None:
  187.                             log( __name__ ,"%s Movie found in list, getting subs ..." % debug_pretext)
  188.                             url = main_url + subspage_url
  189.                             content, response_url = geturl(url)
  190.                             if content is not None:
  191.                                 getallsubs(response_url, content, lang1, title, subtitles_list, "")
  192.                                 if (lang2 != lang1): getallsubs(response_url, content, lang2, title, subtitles_list, "")
  193.                                 if ((lang3 != lang2) and (lang3 != lang1)): getallsubs(response_url, content, lang3, title, subtitles_list, "")
  194.                         else:
  195.                             log( __name__ ,"%s Movie not found in list: %s" % (debug_pretext, title))
  196.             if len(tvshow) > 0:
  197.                 log( __name__ ,"%s Multiple tv show seasons found, searching for the right one ..." % debug_pretext)
  198.                 tv_show_seasonurl = find_tv_show_season(content, tvshow, seasons[int(season)])
  199.                 if tv_show_seasonurl is not None:
  200.                     log( __name__ ,"%s Tv show season found in list, getting subs ..." % debug_pretext)
  201.                     url = main_url + tv_show_seasonurl
  202.                     content, response_url = geturl(url)
  203.                     if content is not None:
  204.                         search_string = "s%#02de%#02d" % (int(season), int(episode))
  205.                         getallsubs(response_url, content, lang1, title, subtitles_list, search_string)
  206.                         if (lang2 != lang1): getallsubs(response_url, content, lang2, title, subtitles_list, search_string)
  207.                         if ((lang3 != lang2) and (lang3 != lang1)): getallsubs(response_url, content, lang3, title, subtitles_list, search_string)
  208.  
  209.  
  210.     return subtitles_list, "", msg #standard output
  211.  
  212.  
  213. def download_subtitles (subtitles_list, pos, zip_subs, tmp_sub_dir, sub_folder, session_id): #standard input
  214.     url = subtitles_list[pos][ "link" ]
  215.     language = subtitles_list[pos][ "language_name" ]
  216.     content, response_url = geturl(url)
  217.     match=  re.compile(downloadlink_pattern).findall(content)
  218.     if match:
  219.         downloadlink = "http://subscene.com"  + match[0]
  220.         log( __name__ ,"%s Downloadlink: %s " % (debug_pretext, downloadlink))
  221.         viewstate = 0
  222.         previouspage = 0
  223.         subtitleid = 0
  224.         typeid = "zip"
  225.         filmid = 0
  226.         postparams = urllib.urlencode( { '__EVENTTARGET': 's$lc$bcr$downloadLink', '__EVENTARGUMENT': '' , '__VIEWSTATE': viewstate, '__PREVIOUSPAGE': previouspage, 'subtitleId': subtitleid, 'typeId': typeid, 'filmId': filmid} )
  227.         class MyOpener(urllib.FancyURLopener):
  228.             version = 'User-Agent=Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.3) Gecko/20100401 Firefox/3.6.3 ( .NET CLR 3.5.30729)'
  229.         my_urlopener = MyOpener()
  230.         my_urlopener.addheader('Referer', url)
  231.         log( __name__ ,"%s Fetching subtitles using url '%s' with referer header '%s' and post parameters '%s'" % (debug_pretext, downloadlink, url, postparams))
  232.         response = my_urlopener.open(downloadlink, postparams)
  233.         local_tmp_file = os.path.join(tmp_sub_dir, "subscene.xxx")
  234.         try:
  235.             log( __name__ ,"%s Saving subtitles to '%s'" % (debug_pretext, local_tmp_file))
  236.             if not os.path.exists(tmp_sub_dir):
  237.                 os.makedirs(tmp_sub_dir)
  238.             local_file_handle = open(local_tmp_file, "w" + "b")
  239.             local_file_handle.write(response.read())
  240.             local_file_handle.close()
  241.             #Check archive type (rar/zip/else) through the file header (rar=Rar!, zip=PK)
  242.             myfile = open(local_tmp_file, "rb")
  243.             myfile.seek(0)
  244.             if (myfile.read(1) == 'R'):
  245.                 typeid = "rar"
  246.                 packed = True
  247.                 log( __name__ , "Discovered RAR Archive")
  248.             else:
  249.                 myfile.seek(0)
  250.                 if (myfile.read(1) == 'P'):
  251.                     typeid = "zip"
  252.                     packed = False
  253.                     log( __name__ , "Discovered ZIP Archive")
  254.                 else:
  255.                     typeid = "srt"
  256.                     packed = False
  257.                     subs_file = local_tmp_file
  258.                     log( __name__ , "Discovered a non-archive file")
  259.             myfile.close()
  260.             local_tmp_file = os.path.join(tmp_sub_dir, "subscene." + typeid)
  261.             os.rename(os.path.join(tmp_sub_dir, "subscene.xxx"), local_tmp_file)
  262.             log( __name__ , "Saving to %s", local_tmp_file)
  263.         except:
  264.             log( __name__ ,"%s I don't know why this exception happens!" % (debug_pretext))
  265.         if packed:
  266.             files = os.listdir(tmp_sub_dir)
  267.             init_filecount = len(files)
  268.             max_mtime = 0
  269.             filecount = init_filecount
  270.             # determine the newest file from tmp_sub_dir
  271.             for file in files:
  272.                 if (string.split(file,'.')[-1] in ['srt','sub','txt']):
  273.                     mtime = os.stat(os.path.join(tmp_sub_dir, file)).st_mtime
  274.                     if mtime > max_mtime:
  275.                         max_mtime =  mtime
  276.             init_max_mtime = max_mtime
  277.             time.sleep(2)  # wait 2 seconds so that the unpacked files are at least 1 second newer
  278.             xbmc.executebuiltin("XBMC.Extract(" + local_tmp_file + "," + tmp_sub_dir +")")
  279.             waittime  = 0
  280.             while (filecount == init_filecount) and (waittime < 20) and (init_max_mtime == max_mtime): # nothing yet extracted
  281.                 time.sleep(1)  # wait 1 second to let the builtin function 'XBMC.extract' unpack
  282.                 files = os.listdir(tmp_sub_dir)
  283.                 filecount = len(files)
  284.                 # determine if there is a newer file created in tmp_sub_dir (marks that the extraction had completed)
  285.                 for file in files:
  286.                     if (string.split(file,'.')[-1] in ['srt','sub','txt']):
  287.                         mtime = os.stat(os.path.join(tmp_sub_dir, file)).st_mtime
  288.                         if (mtime > max_mtime):
  289.                             max_mtime =  mtime
  290.                 waittime  = waittime + 1
  291.             if waittime == 20:
  292.                 log( __name__ ,"%s Failed to unpack subtitles in '%s'" % (debug_pretext, tmp_sub_dir))
  293.             else:
  294.                 log( __name__ ,"%s Unpacked files in '%s'" % (debug_pretext, tmp_sub_dir))
  295.                 for file in files:
  296.                     # there could be more subtitle files in tmp_sub_dir, so make sure we get the newly created subtitle file
  297.                     if (string.split(file, '.')[-1] in ['srt', 'sub', 'txt']) and (os.stat(os.path.join(tmp_sub_dir, file)).st_mtime > init_max_mtime): # unpacked file is a newly created subtitle file
  298.                         log( __name__ ,"%s Unpacked subtitles file '%s'" % (debug_pretext, file))
  299.                         subs_file = os.path.join(tmp_sub_dir, file)
  300.         log( __name__ ,"%s Subtitles saved to '%s'" % (debug_pretext, local_tmp_file))
  301.         return False, language, subs_file #standard output
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement