Advertisement
Guest User

Untitled

a guest
Feb 11th, 2010
989
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.64 KB | None | 0 0
  1. import urllib, lxml.html, os, glob
  2. baseFolder = '/media/ANIME/Anime'
  3. os.chdir(baseFolder)
  4. exceptions = []
  5. for folder in glob.glob('*'):
  6.   if os.path.isdir(folder):
  7.     os.chdir(folder)
  8.     try:
  9.       site = urllib.urlopen("http://www.anime-planet.com/search.php?search_type=all&search="+folder)
  10.       searchSource = bla.read()
  11.       site.close()
  12.       searchPage = lxml.html.document_fromstring(searchSource)
  13.       searchSibling = searchPage.find_class('theme1')[0]
  14.       table = searchSibling.getnext().getnext().getnext()
  15.       firstResultLink = "http://www.anime-planet.com"+table[1][0][0].get("href")
  16.       site = urllib.urlopen(firstResultLink)
  17.       animeSource = blatwo.read()
  18.       site.close()
  19.       animePage = lxml.html.document_fromstring(animeSource)
  20.       infoSibling = animePage.find_class('headerSynopsis')[0]
  21.       description = infoSibling.getnext().text_content().encode('utf-8').strip()
  22.       genres = []
  23.       for child in infoSibling.getparent().getnext()[1]:
  24.         try:
  25.           genres.append(child[0].text_content().encode('utf-8').strip())
  26.         except:
  27.           pass
  28.       title = animePage.get_element_by_id('anime')[2].text_content().encode('utf-8').strip()
  29.       infoTxt = open("Info.txt", 'w')
  30.       infoTxt.write('Anime Information-\nName: '+title+"\nGenres: "+', '.join(genres)+"\nDescription:\n"+description)
  31.       infoTxt.close()
  32.       print 'Finish Folder: '+folder
  33.     except:
  34.       exceptions.append(folder)
  35.       print 'Exception on '+folder
  36.       pass
  37.     os.chdir(baseFolder)
  38. excepttxt = open('exceptions.txt', 'w')
  39. for excepts in exceptions:
  40.   excepttxt.write(excepts+"\n")
  41. excepttxt.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement