Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib, lxml.html, os, glob
- baseFolder = '/media/ANIME/Anime'
- os.chdir(baseFolder)
- exceptions = []
- for folder in glob.glob('*'):
- if os.path.isdir(folder):
- os.chdir(folder)
- try:
- site = urllib.urlopen("http://www.anime-planet.com/search.php?search_type=all&search="+folder)
- searchSource = bla.read()
- site.close()
- searchPage = lxml.html.document_fromstring(searchSource)
- searchSibling = searchPage.find_class('theme1')[0]
- table = searchSibling.getnext().getnext().getnext()
- firstResultLink = "http://www.anime-planet.com"+table[1][0][0].get("href")
- site = urllib.urlopen(firstResultLink)
- animeSource = blatwo.read()
- site.close()
- animePage = lxml.html.document_fromstring(animeSource)
- infoSibling = animePage.find_class('headerSynopsis')[0]
- description = infoSibling.getnext().text_content().encode('utf-8').strip()
- genres = []
- for child in infoSibling.getparent().getnext()[1]:
- try:
- genres.append(child[0].text_content().encode('utf-8').strip())
- except:
- pass
- title = animePage.get_element_by_id('anime')[2].text_content().encode('utf-8').strip()
- infoTxt = open("Info.txt", 'w')
- infoTxt.write('Anime Information-\nName: '+title+"\nGenres: "+', '.join(genres)+"\nDescription:\n"+description)
- infoTxt.close()
- print 'Finish Folder: '+folder
- except:
- exceptions.append(folder)
- print 'Exception on '+folder
- pass
- os.chdir(baseFolder)
- excepttxt = open('exceptions.txt', 'w')
- for excepts in exceptions:
- excepttxt.write(excepts+"\n")
- excepttxt.close()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement