Advertisement
Fishi

Anime Planet XML List Exporter Fail noob mod

Apr 22nd, 2013
382
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.92 KB | None | 0 0
  1. #!/usr/bin/python
  2. #This script will take your anime-planet.com username and scrape a list of your watched anime in XML utf-8 format to anime-planet.xml
  3. #Will also create an anime_list.txt file for all the anime that needs to be added manually. Note that the code will break if there are
  4. #special characters invovled... So remove the anime from your Anime-Planet list first.
  5. #Additional info and packages:
  6. # Python 2.7 - http://python.org/download/
  7. # BeautifulSoup - http://www.crummy.com/software/BeautifulSoup/#Download
  8.  
  9. from bs4 import BeautifulSoup,NavigableString
  10. import urllib2,sys,re,codecs
  11.  
  12. print 'This script will export your anime-planet.com anime list and saves it to anime-planet.xml'
  13. username = raw_input("Enter your username: ")
  14. baseURL = 'http://www.anime-planet.com/users/'+username+'/anime'
  15. html = urllib2.urlopen(baseURL).read()
  16. html = BeautifulSoup(html)
  17. pageNumber = int (html.find('li','next').findPrevious('li').next.contents[0])
  18. delimiter = '\t'
  19.  
  20. queryURL = 'http://mal-api.com/anime/search?q='
  21.  
  22. f = codecs.open('anime-planet.xml', 'w', 'utf-8')
  23. f.write ('<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n')
  24. f.write ('<myanimelist>\n')
  25. animeList = []
  26. g = codecs.open('anime_list.txt', 'w', 'utf-8')
  27. g.write ('You will have to add these anime manually \n \n')
  28.  
  29. print 'Exporting rough variant of myanimelist format...'
  30. for i in range(1,pageNumber+1):
  31. baseURL = 'http://www.anime-planet.com/users/'+username+'/anime?page='+str(i)
  32. html = urllib2.urlopen(baseURL).read()
  33. html = BeautifulSoup(html)
  34. for animeItem in html.findAll('tr')[1:]:
  35. animeItem = BeautifulSoup(animeItem.renderContents())
  36. animeName = '' + animeItem.a.text
  37. queryTitle = urllib2.urlopen(queryURL + animeName.replace(" ", "_")).read()
  38. f.write ('\t<anime>\n');
  39. animeID = queryTitle[7:queryTitle.find(',')]
  40. if animeID == '':
  41. animeList.append(animeName)
  42. print animeName
  43. g.write ("%s"%animeName + '\n')
  44.  
  45. f.write ('\t\t<series_animedb_id>'+ animeID +'</series_animedb_id>\n');
  46. f.write ('\t\t<series_title><![CDATA['+ animeItem.a.text +']]></series_title>\n');
  47. f.write ('\t\t<series_type>' + animeItem.find('td','tableType').text + '</series_type>\n');
  48. f.write ('\t\t<my_watched_episodes>'+ animeItem.find('td','tableEps').text.replace('&nbsp;','1') +'</my_watched_episodes>\n');
  49. f.write ('\t\t<my_score>' + str(int(float(animeItem.img['name'])*2)) + '</my_score>\n');
  50. f.write ('\t\t<my_status>' + animeItem.find('td','tableStatus').text.replace('status box','') +'</my_status>\n');
  51. #f.write ('\t\t<update_on_import>1</update_on_import>\n');
  52. f.write ('\t</anime>\n\n');
  53.  
  54.  
  55.  
  56. print 'Done, see anime-planet.xml and anime_list.txt'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement