SHOW:
|
|
- or go back to the newest paste.
| 1 | #!/usr/bin/python | |
| 2 | #This script will take your anime-planet.com username and scrape a list of your watched anime in XML utf-8 format to anime-planet.xml | |
| 3 | #Will also create an anime_list.txt file for all the anime that needs to be added manually. Note that the code will break if there are | |
| 4 | #special characters involved... So remove the anime from your Anime-Planet list first. | |
| 5 | #Additional info and packages: | |
| 6 | - | # In order to successfully import the exported Anime-Planet list to MAL, first export a MAL list, and copy the <myinfo> block just after <myanimelist> |
| 6 | + | |
| 7 | - | |
| 7 | + | |
| 8 | # Trying to make it work again here: http://myanimelist.net/forum/?topicid=135910&show=40 | |
| 9 | - | import urllib2,sys,re,codecs |
| 9 | + | |
| 10 | from bs4 import BeautifulSoup,NavigableString | |
| 11 | - | |
| 11 | + | import urllib.request, urllib.error, urllib.parse,sys,re,codecs |
| 12 | import json | |
| 13 | - | username = raw_input("Enter your username: ")
|
| 13 | + | |
| 14 | print('This script will export your anime-planet.com anime list and saves it to anime-planet.xml')
| |
| 15 | - | html = urllib2.urlopen(baseURL).read() |
| 15 | + | username = input("Enter your username: ")
|
| 16 | baseURL = 'http://www.anime-planet.com/users/'+username+'/anime' | |
| 17 | html = urllib.request.urlopen(baseURL).read() | |
| 18 | html = BeautifulSoup(html) | |
| 19 | - | |
| 19 | + | |
| 20 | delimiter = '\t' | |
| 21 | - | |
| 21 | + | |
| 22 | - | f = codecs.open('anime-planet2.xml', 'w', 'utf-8')
|
| 22 | + | |
| 23 | ||
| 24 | f = codecs.open('anime-planet.xml', 'w', 'utf-8')
| |
| 25 | f.write ('<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n')
| |
| 26 | f.write ('<myanimelist>\n')
| |
| 27 | g = codecs.open('anime_list.txt', 'w', 'utf-8')
| |
| 28 | - | baseURL = 'http://www.anime-planet.com/users/'+username+'/anime?page='+str(i) |
| 28 | + | g.write ('You will have to add these anime manually \n \n')
|
| 29 | - | html = urllib2.urlopen(baseURL).read() |
| 29 | + | |
| 30 | - | html = BeautifulSoup(html) |
| 30 | + | |
| 31 | - | for animeItem in html.findAll('tr')[1:]:
|
| 31 | + | |
| 32 | - | animeItem = BeautifulSoup(animeItem.renderContents()) |
| 32 | + | baseURL = 'http://www.anime-planet.com/users/'+username+'/anime?page='+str(i) |
| 33 | - | animeName = '' + animeItem.a.text |
| 33 | + | html = urllib.request.urlopen(baseURL).read() |
| 34 | - | queryName = re.sub('[^A-Za-z0-9]+', '%20', animeName)
|
| 34 | + | html = BeautifulSoup(html) |
| 35 | - | queryTitle = urllib2.urlopen(queryURL + queryName).read() |
| 35 | + | for animeItem in html.findAll('tr')[1:]:
|
| 36 | - | print(animeName) |
| 36 | + | animeItem = BeautifulSoup(animeItem.renderContents()) |
| 37 | - | search=json.loads(queryTitle.decode('utf8'))
|
| 37 | + | animeName = '' + animeItem.a.text |
| 38 | - | for x in search: |
| 38 | + | queryName = re.sub('[^A-Za-z0-9]+', '%20', animeName)
|
| 39 | - | #print(animeName) |
| 39 | + | queryTitle = urllib.request.urlopen(queryURL + queryName).read() |
| 40 | - | try: |
| 40 | + | print(queryTitle) |
| 41 | - | if animeName.lower()==x["title"].lower(): |
| 41 | + | search=json.loads(queryTitle.decode('utf8'))
|
| 42 | - | animeID=str(x["id"]) |
| 42 | + | for x in search: |
| 43 | - | elif animeName.lower()in [y.lower() for y in x["other_titles"]["english"]]: |
| 43 | + | print(animeName) |
| 44 | - | animeID=str(x["id"]) |
| 44 | + | try: |
| 45 | - | elif animeName.lower() in [j.lower() for j in x["other_titles"]["synonyms"]]: |
| 45 | + | if animeName.lower()==x["title"].lower(): |
| 46 | - | animeID=str(x["id"]) |
| 46 | + | animeID=str(x["id"]) |
| 47 | - | except KeyError as e: |
| 47 | + | elif animeName.lower()in [y.lower() for y in x["other_titles"]["english"]]: |
| 48 | - | pass |
| 48 | + | animeID=str(x["id"]) |
| 49 | - | if animeItem.find('td','tableStatus').text.replace('status box','').replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "")=="Watched":
|
| 49 | + | elif animeName.lower() in [j.lower() for j in x["other_titles"]["synonyms"]]: |
| 50 | - | status="Completed" |
| 50 | + | animeID=str(x["id"]) |
| 51 | - | elif animeItem.find('td','tableStatus').text.replace('status box','').replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "")=="Stalled":
|
| 51 | + | except KeyError as e: |
| 52 | - | status="On-Hold" |
| 52 | + | pass |
| 53 | - | elif animeItem.find('td','tableStatus').text.replace('status box','').replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "")=="WanttoWatch":
|
| 53 | + | |
| 54 | - | status="Plan to Watch" |
| 54 | + | f.write ('\t<anime>\n');
|
| 55 | - | elif animeItem.find('td','tableStatus').text.replace('status box','').replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "")=="Won'tWatch":
|
| 55 | + | f.write ('\t\t<series_animedb_id>'+ animeID +'</series_animedb_id>\n');
|
| 56 | - | status="Dropped" |
| 56 | + | f.write ('\t\t<series_title><![CDATA['+ animeName +']]></series_title>\n');
|
| 57 | - | continue |
| 57 | + | f.write ('\t\t<series_type>' + animeItem.find('td','tableType').text + '</series_type>\n');
|
| 58 | - | else: |
| 58 | + | f.write ('\t\t<my_watched_episodes>'+ animeItem.find('td','tableEps').text.replace(' ','1') +'</my_watched_episodes>\n');
|
| 59 | - | status=animeItem.find('td','tableStatus').text.replace('status box','').replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "")
|
| 59 | + | f.write ('\t\t<my_score>' + str(int(float(animeItem.img['name'])*2)) + '</my_score>\n');
|
| 60 | - | f.write ('\t<anime>\n');
|
| 60 | + | f.write ('\t\t<my_status>' + animeItem.find('td','tableStatus').text.replace('status box','') +'</my_status>\n');
|
| 61 | - | f.write ('\t\t<series_animedb_id>'+ animeID +'</series_animedb_id>\n');
|
| 61 | + | f.write ('\t\t<update_on_import>1</update_on_import>\n');
|
| 62 | - | f.write ('\t\t<series_title><![CDATA['+ animeName +']]></series_title>\n');
|
| 62 | + | f.write ('\t</anime>\n\n');
|
| 63 | - | f.write ('\t\t<series_type>' + animeItem.find('td','tableType').text + '</series_type>\n');
|
| 63 | + | |
| 64 | - | f.write ('\t\t<my_id>0</my_id>\n');
|
| 64 | + | |
| 65 | - | f.write ('\t\t<my_watched_episodes>'+ animeItem.find('td','tableEps').text.replace(' ','1').replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "") +'</my_watched_episodes>\n');
|
| 65 | + | |
| 66 | - | f.write ('\t\t<my_start_date>0000-00-00</my_start_date>\n');
|
| 66 | + | f.write ('</myanimelist>\n')
|
| 67 | - | f.write ('\t\t<my_finish_date>0000-00-00</my_finish_date>\n');
|
| 67 | + |