Advertisement
Fishi

AP scrapper + MAL IDs

Apr 23rd, 2013
720
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.90 KB | None | 0 0
  1. #!/usr/bin/python
  2. #This script will take your anime-planet.com username and scrape a list of your watched anime in XML utf-8 format to anime-planet.xml
  3. #Additional info and packages:
  4. # Python 3.3 - http://python.org/download/
  5. # BeautifulSoup - http://www.crummy.com/software/BeautifulSoup/#Download
  6. # In order to successfully import the exported Anime-Planet list to MAL, first export a MAL list, and copy the <myinfo> block just after <myanimelist>
  7.  
  8. from bs4 import BeautifulSoup,NavigableString
  9. import urllib2,sys,re,codecs
  10. import json
  11.  
  12. print('This script will export your anime-planet.com anime list and saves it to anime-planet.xml')
  13. username = raw_input("Enter your username: ")
  14. baseURL = 'http://www.anime-planet.com/users/'+username+'/anime'
  15. html = urllib2.urlopen(baseURL).read()
  16. html = BeautifulSoup(html)
  17. pageNumber = int (html.find('li','next').findPrevious('li').next.contents[0])
  18. delimiter = '\t'
  19.  
  20. queryURL = 'http://mal-api.com/anime/search?q='
  21.  
  22. f = codecs.open('anime-planet2.xml', 'w', 'utf-8')
  23. f.write ('<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n')
  24. f.write ('<myanimelist>\n')
  25.  
  26. print('Exporting rough variant of myanimelist format... \n')
  27. for i in range(1,pageNumber+1):
  28. baseURL = 'http://www.anime-planet.com/users/'+username+'/anime?page='+str(i)
  29. html = urllib2.urlopen(baseURL).read()
  30. html = BeautifulSoup(html)
  31. for animeItem in html.findAll('tr')[1:]:
  32. animeItem = BeautifulSoup(animeItem.renderContents())
  33. animeName = '' + animeItem.a.text
  34. queryName = re.sub('[^A-Za-z0-9]+', '%20', animeName)
  35. queryTitle = urllib2.urlopen(queryURL + queryName).read()
  36. print(animeName)
  37. search=json.loads(queryTitle.decode('utf8'))
  38. for x in search:
  39. #print(animeName)
  40. try:
  41. if animeName.lower()==x["title"].lower():
  42. animeID=str(x["id"])
  43. elif animeName.lower()in [y.lower() for y in x["other_titles"]["english"]]:
  44. animeID=str(x["id"])
  45. elif animeName.lower() in [j.lower() for j in x["other_titles"]["synonyms"]]:
  46. animeID=str(x["id"])
  47. except KeyError as e:
  48. pass
  49. if animeItem.find('td','tableStatus').text.replace('status box','').replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "")=="Watched":
  50. status="Completed"
  51. elif animeItem.find('td','tableStatus').text.replace('status box','').replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "")=="Stalled":
  52. status="On-Hold"
  53. elif animeItem.find('td','tableStatus').text.replace('status box','').replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "")=="WanttoWatch":
  54. status="Plan to Watch"
  55. elif animeItem.find('td','tableStatus').text.replace('status box','').replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "")=="Won'tWatch":
  56. status="Dropped"
  57. continue
  58. else:
  59. status=animeItem.find('td','tableStatus').text.replace('status box','').replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "")
  60. f.write ('\t<anime>\n');
  61. f.write ('\t\t<series_animedb_id>'+ animeID +'</series_animedb_id>\n');
  62. f.write ('\t\t<series_title><![CDATA['+ animeName +']]></series_title>\n');
  63. f.write ('\t\t<series_type>' + animeItem.find('td','tableType').text + '</series_type>\n');
  64. f.write ('\t\t<my_id>0</my_id>\n');
  65. f.write ('\t\t<my_watched_episodes>'+ animeItem.find('td','tableEps').text.replace('&nbsp;','1').replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "") +'</my_watched_episodes>\n');
  66. f.write ('\t\t<my_start_date>0000-00-00</my_start_date>\n');
  67. f.write ('\t\t<my_finish_date>0000-00-00</my_finish_date>\n');
  68. f.write ('\t\t<my_fansub_group><![CDATA[]]></my_fansub_group>\n');
  69. f.write ('\t\t<my_rated></my_rated>\n');
  70. f.write ('\t\t<my_score>' + str(int(float(animeItem.img['name'])*2)).replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "") + '</my_score>\n');
  71. f.write ('\t\t<my_dvd></my_dvd>\n');
  72. f.write ('\t\t<my_storage></my_storage>\n');
  73. f.write ('\t\t<my_status>' + status +'</my_status>\n');
  74. f.write ('\t\t<my_comments><![CDATA[]]></my_comments>\n');
  75. f.write ('\t\t<my_times_watched>0</my_times_watched>\n');
  76. f.write ('\t\t<my_rewatch_value></my_rewatch_value>\n');
  77. f.write ('\t\t<my_downloaded_eps>0</my_downloaded_eps>\n');
  78. f.write ('\t\t<my_tags><![CDATA[]]></my_tags>\n');
  79. f.write ('\t\t<my_rewatching>0</my_rewatching>\n');
  80. f.write ('\t\t<my_rewatching_ep>0</my_rewatching_ep>\n');
  81. f.write ('\t\t<update_on_import>1</update_on_import>\n');
  82. f.write ('\t</anime>\n\n');
  83.  
  84.  
  85.  
  86. f.write ('</myanimelist>\n')
  87. print('Done, see anime-planet.xml and anime_list.txt')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement