Advertisement
JS5623

PrimeWire Scraper

Apr 30th, 2013
106
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.79 KB | None | 0 0
  1. import os
  2. import sys
  3. import re
  4. import urllib2
  5. import webbrowser
  6.  
  7. from bs4 import BeautifulSoup
  8.  
  9. def show_search(surl):
  10.  
  11. title = raw_input("enter show: ").replace(' ','+')
  12. print '[*] Working...\n'
  13.  
  14. host = 'http://www.primewire.ag'
  15.  
  16. soup1 = BeautifulSoup(urllib2.urlopen(host))
  17.  
  18. for field in soup1.find_all('fieldset', {'class':'search_container'}):
  19. key = field.find('input', {'name':'key'})
  20. base = host + '/index.php?search_keywords={}&key={}&search_section=2'.format(title, key['value'])
  21.  
  22. soup1 = BeautifulSoup(urllib2.urlopen(base))
  23.  
  24. for div in soup1.find_all('div', {'class':'index_item index_item_ie'}):
  25. a = div.find('a')['href']
  26.  
  27. if a:
  28. link1 = host + a.replace('watch', 'tv') + surl
  29. print link1
  30.  
  31. return link1
  32.  
  33. def episode_list(search):
  34.  
  35. soup2 = BeautifulSoup(urllib2.urlopen(search))
  36. links2 = {}
  37.  
  38. for div in soup2.find_all('div', {'class':'tv_episode_item'}):
  39. for a in div.find_all('a'):
  40.  
  41. match2 = re.search(r'(\d+)', a.get_text())
  42. title = a.find('span', {'class':'tv_episode_name'})
  43.  
  44. if match2:
  45. number2 = match2.group(1)
  46. links2[number2] = 'http://www.primewire.ag' + a['href']
  47.  
  48. if title:
  49. print '({}){}'.format(number2, title.get_text().encode('utf-8'))
  50. else:
  51. print '({}) - Untitled'.format(number2)
  52.  
  53. return links2
  54.  
  55. def link_search(episodes, episode):
  56. print '[*] Searching...\n'
  57.  
  58. soup3 = BeautifulSoup(urllib2.urlopen(episodes[episode]))
  59. links3 = {}
  60.  
  61. for table in soup3.find_all('table', {'width':'100%'}):
  62. for a in table.find_all('a', {'target':'_blank'}):
  63. for li in table.find_all('li', {'class':'current-rating'}):
  64. for sn in table.find_all('span', {'class':'version_host'}):
  65.  
  66. match3 = re.search(r'(\d+)', a.get_text())
  67. img = table.find('img', {'title':'Verified Link'})
  68.  
  69. if match3:
  70. number3 = match3.group(1)
  71. links3[number3] = 'http://www.primewire.ag' + a['href']
  72.  
  73. if img:
  74. print '({}) - {} - {} [!]'.format(number3, li.get_text(), sn.get_text())
  75. else:
  76. print '({}) - {} - {}'.format(number3, li.get_text(), sn.get_text())
  77. return links3
  78.  
  79. if __name__ == '__main__':
  80.  
  81. if len(sys.argv) != 2:
  82. print 'Primewire.py season#'
  83. sys.exit()
  84.  
  85. my_seasons = {
  86. '1': '/season-1',
  87. '2': '/season-2',
  88. '3': '/season-3',
  89. '4': '/season-4',
  90. '5': '/season-5',
  91. '6': '/season-6',
  92. }
  93. for number in my_seasons:
  94. if number == sys.argv[1]:
  95. season = my_seasons[number]
  96.  
  97. try:
  98. search = show_search(season)
  99. episodes = episode_list(search)
  100.  
  101. while True:
  102. episode = raw_input('\n>>> #: ')
  103. if episode in episodes:
  104. os.system('cls')
  105. break
  106.  
  107. versions = link_search(episodes, episode)
  108.  
  109. while True:
  110. vid_link = raw_input('\n>>> #: ')
  111. if vid_link in versions:
  112. break
  113.  
  114. print '[*] Opening link in browser!'
  115. webbrowser.open(versions[vid_link])
  116. sys.exit()
  117.  
  118. except urllib2.HTTPError:
  119. print '- HTTP Error!'
  120. sys.exit()
  121. except urllib2.URLError:
  122. print '- Connection Faliure!'
  123. sys.exit()
  124. except:
  125. sys.exit()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement