SHARE
TWEET

Untitled

a guest Apr 24th, 2013 191 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/python
  2. #This script will take your anime-planet.com username and scrape a list of your watched anime in XML utf-8 format to anime-planet.xml
  3. #Will also create an anime_list.txt file for all the anime that needs to be added manually. Note that the code will break if there are
  4. #special characters involved... So remove the anime from your Anime-Planet list first.
  5. #Additional info and packages:
  6. # Python 3.3 - http://python.org/download/
  7. # BeautifulSoup - http://www.crummy.com/software/BeautifulSoup/#Download
  8. # Trying to make it work again here: http://myanimelist.net/forum/?topicid=135910&show=40
  9.  
  10. from bs4 import BeautifulSoup,NavigableString
  11. import urllib.request, urllib.error, urllib.parse,sys,re,codecs
  12. import json
  13.  
  14. print('This script will export your anime-planet.com anime list and saves it to anime-planet.xml')
  15. username = input("Enter your username: ")
  16. baseURL = 'http://www.anime-planet.com/users/'+username+'/anime'
  17. html = urllib.request.urlopen(baseURL).read()
  18. html = BeautifulSoup(html)
  19. pageNumber = int (html.find('li','next').findPrevious('li').next.contents[0])
  20. delimiter = '\t'
  21.  
  22. queryURL = 'http://mal-api.com/anime/search?q='
  23.  
  24. f = codecs.open('anime-planet.xml', 'w', 'utf-8')
  25. f.write ('<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n')
  26. f.write ('<myanimelist>\n')
  27. g = codecs.open('anime_list.txt', 'w', 'utf-8')
  28. g.write ('You will have to add these anime manually \n \n')
  29.  
  30. print('Exporting rough variant of myanimelist format... \n')
  31. for i in range(1,pageNumber+1):
  32.                 baseURL = 'http://www.anime-planet.com/users/'+username+'/anime?page='+str(i)
  33.                 html = urllib.request.urlopen(baseURL).read()
  34.                 html = BeautifulSoup(html)
  35.                 for animeItem in html.findAll('tr')[1:]:
  36.                         animeItem = BeautifulSoup(animeItem.renderContents())
  37.                         animeName = '' + animeItem.a.text
  38.                         queryName = re.sub('[^A-Za-z0-9]+', '%20', animeName)
  39.                         queryTitle = urllib.request.urlopen(queryURL + queryName).read()  
  40.                         print(queryTitle)
  41.                         search=json.loads(queryTitle.decode('utf8'))
  42.                         for x in search:
  43.                                 print(animeName)
  44.                                 try:
  45.                                         if animeName.lower()==x["title"].lower():
  46.                                                 animeID=str(x["id"])
  47.                                         elif animeName.lower()in [y.lower() for y in x["other_titles"]["english"]]:
  48.                                                 animeID=str(x["id"])
  49.                                         elif animeName.lower() in [j.lower() for j in x["other_titles"]["synonyms"]]:
  50.                                                 animeID=str(x["id"])
  51.                                 except KeyError as e:
  52.                                         pass
  53.  
  54.                         f.write ('\t<anime>\n');
  55.                         f.write ('\t\t<series_animedb_id>'+ animeID +'</series_animedb_id>\n');
  56.                         f.write ('\t\t<series_title><![CDATA['+ animeName +']]></series_title>\n');
  57.                         f.write ('\t\t<series_type>' + animeItem.find('td','tableType').text + '</series_type>\n');
  58.                         f.write ('\t\t<my_watched_episodes>'+ animeItem.find('td','tableEps').text.replace('&nbsp;','1') +'</my_watched_episodes>\n');
  59.                         f.write ('\t\t<my_score>' + str(int(float(animeItem.img['name'])*2)) + '</my_score>\n');
  60.                         f.write ('\t\t<my_status>' + animeItem.find('td','tableStatus').text.replace('status box','') +'</my_status>\n');
  61.                         f.write ('\t\t<update_on_import>1</update_on_import>\n');
  62.                         f.write ('\t</anime>\n\n');
  63.  
  64.  
  65.                                
  66. f.write ('</myanimelist>\n')   
  67. print('Done, see anime-planet.xml and anime_list.txt')
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top