Guest User

scraper.py

a guest
Apr 29th, 2020
46
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.81 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. # KodiAddon (CBC News)
  3. #
  4. from t1mlib import t1mAddon
  5. import datetime
  6. import json
  7. import re
  8. import urllib
  9. import urllib2
  10. import xbmcplugin
  11. import xbmcgui
  12. import HTMLParser
  13. import sys
  14. import xbmc
  15.  
  16. h = HTMLParser.HTMLParser()
  17. UTF8 = 'utf-8'
  18.  
  19. class myAddon(t1mAddon):
  20.  
  21. def getAddonMenu(self,url,ilist):
  22. html = self.getRequest('http://www.cbc.ca/player')
  23. shows = re.compile('<h2 class="section-title"[^>]*><a[^>]* href="(.+?)">(.+?)</a>', re.DOTALL).findall(html)
  24. shows.append(('/player/news/TV%20Shows/MarketPlace', 'Marketplace'))
  25. shows.append(('/player/news/TV%20Shows/Power%20&%20Politics', 'Power & Politics'))
  26. shows.append(('/player/news/TV%20Shows/The%20Fifth%20Estate', 'The Fifth Estate'))
  27. shows.append(('/player/news/TV%20Shows/The%20National/Latest%20Broadcast', 'The National'))
  28. shows.append(('/player/news/TV%20Shows/The%20Weekly', 'The Weekly'))
  29. for url, name in shows:
  30. infoList = {}
  31. infoList['mediatype'] = 'tvshow'
  32. infoList['Title'] = name
  33. infoList['TVShowTitle'] = name
  34. ilist = self.addMenuItem(name, 'GS', ilist, url, self.addonIcon, self.addonFanart, infoList, isFolder=True)
  35. return(ilist)
  36.  
  37. def getAddonCats(self,url,ilist):
  38. html = self.getRequest('http://www.cbc.ca')
  39. html = re.compile('<!-- -->My Local Settings(.+?)href="/news">Top Stories', re.DOTALL).search(html).group(1)
  40. shows = re.compile('<li class="regionsListItem"><.+?data-path="(.+?)".+?value="(.+?)".+?</li>', re.DOTALL).findall(html)
  41. for url, name in shows:
  42. infoList = {}
  43. infoList['mediatype'] = 'tvshow'
  44. infoList['Title'] = name
  45. infoList['TVShowTitle'] = name
  46. lurl = "/player/"+url
  47. # Manual fixes for multi-word locations
  48. if lurl == '/player/news/canada/british-columbia':
  49. lurl = '/player/news/canada/bc'
  50. if lurl == '/player/news/canada/thunder-bay':
  51. lurl = '/player/news/canada/thunder%20bay'
  52. if lurl == '/player/news/canada/new-brunswick':
  53. lurl = '/player/news/canada/nb'
  54. if lurl == '/player/news/canada/prince-edward-island':
  55. lurl = '/player/news/canada/pei'
  56. if lurl == '/player/news/canada/nova-scotia':
  57. lurl = '/player/news/canada/ns'
  58. if lurl == '/player/news/canada/newfoundland-labrador':
  59. lurl = '/player/news/canada/nl'
  60. ilist = self.addMenuItem(name, 'GE', ilist, lurl, self.addonIcon, self.addonFanart, infoList, isFolder=True)
  61. if lurl == '/player/news/canada/toronto':
  62. ilist = self.addMenuItem('Ottawa', 'GE', ilist, '/player/news/canada/ottawa', self.addonIcon, self.addonFanart, infoList, isFolder=True)
  63. return(ilist)
  64.  
  65. def getAddonShows(self,url,ilist):
  66. html = self.getRequest('http://www.cbc.ca%s' % url)
  67. shows = re.compile('<h2 class="section-title"[^>]*><a[^>]* href="(.+?)">(.+?)</a>', re.DOTALL).findall(html)
  68. count = 0
  69. for lurl, name in shows:
  70. count+=1
  71. if (count <= 0) or (url.find('TV%20Shows') > 0):
  72. self.getAddonEpisodes(url, ilist)
  73. else:
  74. for lurl, name in shows:
  75. infoList = {}
  76. infoList['mediatype'] = 'tvshow'
  77. infoList['Title'] = name
  78. infoList['TVShowTitle'] = name
  79. if lurl == '/player/news/canada':
  80. ilist = self.addMenuItem(name, 'GC', ilist, lurl, self.addonIcon, self.addonFanart, infoList, isFolder=True)
  81. else:
  82. ilist = self.addMenuItem(name, 'GS', ilist, lurl, self.addonIcon, self.addonFanart, infoList, isFolder=True)
  83. return(ilist)
  84.  
  85. def getAddonEpisodes(self,url,ilist):
  86. self.defaultVidStream['width'] = 1280
  87. self.defaultVidStream['height'] = 720
  88. cat = re.compile('/([^/]+?)$', re.DOTALL).search(url).group(1).replace('%20', ' ')
  89. html = self.getRequest('http://www.cbc.ca%s' % url)
  90. html = re.compile('window.__INITIAL_STATE__ = (.+?);</script>', re.DOTALL).search(html).group(1)
  91. a = json.loads(html)
  92. # Locate exact category name
  93. for b in a['video']['clipsByCategory']:
  94. if re.search(cat+"$", b, re.IGNORECASE): # category must be at end of string
  95. idxcat = b
  96. for b in a['video']['clipsByCategory'][idxcat]['items']:
  97. name = b['title'].replace(u"\u2018", "'").replace(u"\u2019", "'").encode('ascii', 'xmlcharrefreplace')
  98. plot = b['description'].replace(u"\u2018", "'").replace(u"\u2019", "'").encode('ascii', 'xmlcharrefreplace')
  99. vurl = str(b['id']) # mediaID
  100. thumb = b['thumbnail']
  101. fanart = thumb
  102. if b['captions']:
  103. captions = b['captions']['src']
  104. else:
  105. captions = 'N0NE'
  106. infoList = {}
  107. infoList['mediatype'] = 'tvshow'
  108. infoList['Title'] = name
  109. infoList['TVShowTitle'] = name
  110. infoList['Plot'] = plot
  111. infoList['Duration'] = b['duration']
  112. infoList['Aired'] = datetime.datetime.fromtimestamp(b['airDate']/1000).strftime('%Y-%m-%d')
  113. infoList['MPAA'] = captions # Hack to store closed captions
  114. ilist = self.addMenuItem(name, 'GV', ilist, vurl, thumb, fanart, infoList, isFolder=False)
  115. return(ilist)
  116.  
  117. def getAddonVideo(self,url):
  118. # u = url.split('/meta.smil',1)[0]
  119. u = 'https://link.theplatform.com/s/ExhSPC/media/guid/2655402169/' + url
  120. u = u + '/meta.smil'
  121. u = u + '?mbr=true&manifest=m3u&feed=Player%20Selector%20-%20Prod'
  122. html = self.getRequest(u)
  123. u = re.compile('RESOLUTION=1280x720.+?\n(http.+?)\?', re.DOTALL).search(html).group(1)
  124. if u is None:
  125. return
  126. liz = xbmcgui.ListItem(path = u.strip())
  127. captions = xbmc.getInfoLabel('ListItem.MPAA')
  128. #xbmc.log("gAV captions: "+captions, xbmc.LOGNOTICE)
  129. #if captions.find('.srt') > 0:
  130. # liz.setSubtitles([captions])
  131. liz.setSubtitles([captions])
  132. xbmcplugin.setResolvedUrl(int(sys.argv[1]), True, liz)
Add Comment
Please, Sign In to add comment