Guest User

youtube parser v5

a guest
Jan 7th, 2015
239
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.16 KB | None | 0 0
  1. #!/usr/bin/python
  2. print "Content-type: text/html\n\n"
  3. # -*- coding:Utf-8 -*-
  4. import urllib2
  5.  
  6.  
  7. def check_channel_update(new_data, channel):
  8.     fichier = open('video.txt', 'r')
  9.     data = fichier.read()
  10.     fichier.close()
  11.     base_url = 'http://www.youtube.com'
  12.     data = data.split('\n\n')[:-1] #do not forget to add delimiter at end of each channel !
  13.     channel_old_data = ''
  14.  
  15.     for element in data:
  16.         if channel in element:
  17.             channel_old_data = data.pop(data.index(element))
  18.             break
  19.  
  20.     if not channel_old_data : return False
  21.  
  22.     channel_new_data = ''
  23.     for new_video in new_data:
  24.         if not new_video in channel_old_data:
  25.             print 'new video avalaible'
  26.             channel_new_data += base_url+new_video+' '+title[new_data.index(new_video)]+'\n'
  27.         else:
  28.             break #leave loop cause no new video after that
  29.  
  30.     if len(channel_new_data):
  31.         before, after = channel_old_data.split('\n',1)
  32.         update =  before +'\n'+ channel_new_data + after
  33.         data.insert(0, update)
  34.         data = '\n\n'.join(data)
  35.         data += '\n\n'
  36.  
  37.         write_data(data)
  38.  
  39. def write_data(data):
  40.     fichier = open('video.txt', 'w')
  41.     fichier.write(data)
  42.     fichier.close()
  43.  
  44. def parse_html_source(url):
  45.     data=urllib2.urlopen(url)
  46.     data = data.read()
  47.  
  48.     start_tag = '<h3 class="yt-lockup-title">'
  49.     data = [item.split('</h3>')[0] for item in data.split(start_tag)[1:]]
  50.     title = [item.split('title="')[1].split('"  aria-describedby')[0] for item in data]
  51.     link = [item.split('href="')[1].split('">')[0] for item in data]
  52.  
  53.     return title, link
  54.  
  55.  
  56. def update_channel_file(): #used often
  57.     fichier = open('channel.txt', 'r')
  58.     data  = fichier.readlines()
  59.     fichier.close()
  60.  
  61.     name_channel_update = data.pop(0)
  62.     data.append(name_channel_update)
  63.  
  64.     fichier = open('channel.txt', 'w')
  65.     [fichier.write(item) for item in data]
  66.     fichier.close()
  67.     print name_channel_update
  68.     return name_channel_update.split(' ',1)[0]
  69.  
  70.  
  71. url_channel = update_channel_file()
  72.  
  73. title, link = parse_html_source(url_channel)
  74. check_channel_update(link, url_channel)
Advertisement
Add Comment
Please, Sign In to add comment