View difference between Paste ID: jDqTF25v and Ap67mrXM
SHOW: | | - or go back to the newest paste.
1
#!/usr/bin/python
2
#This script will take your anime-planet.com username and scrape a list of your watched anime in XML utf-8 format to anime-planet.xml
3
#Will also create an anime_list.txt file for all the anime that needs to be added manually. Note that the code will break if there are
4
#special characters involved... So remove the anime from your Anime-Planet list first.
5
#Additional info and packages:
6-
# In order to successfully import the exported Anime-Planet list to MAL, first export a MAL list, and copy the <myinfo> block just after <myanimelist>
6+
7-
 
7+
8
# Trying to make it work again here: http://myanimelist.net/forum/?topicid=135910&show=40
9-
import urllib2,sys,re,codecs
9+
10
from bs4 import BeautifulSoup,NavigableString
11-
 
11+
import urllib.request, urllib.error, urllib.parse,sys,re,codecs
12
import json
13-
username = raw_input("Enter your username: ")
13+
14
print('This script will export your anime-planet.com anime list and saves it to anime-planet.xml')
15-
html = urllib2.urlopen(baseURL).read()
15+
username = input("Enter your username: ")
16
baseURL = 'http://www.anime-planet.com/users/'+username+'/anime'
17
html = urllib.request.urlopen(baseURL).read()
18
html = BeautifulSoup(html)
19-
 
19+
20
delimiter = '\t'
21-
 
21+
22-
f = codecs.open('anime-planet2.xml', 'w', 'utf-8')
22+
23
24
f = codecs.open('anime-planet.xml', 'w', 'utf-8')
25
f.write ('<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n')
26
f.write ('<myanimelist>\n')
27
g = codecs.open('anime_list.txt', 'w', 'utf-8')
28-
                baseURL = 'http://www.anime-planet.com/users/'+username+'/anime?page='+str(i)
28+
g.write ('You will have to add these anime manually \n \n')
29-
                html = urllib2.urlopen(baseURL).read()
29+
30-
                html = BeautifulSoup(html)
30+
31-
                for animeItem in html.findAll('tr')[1:]:
31+
32-
                        animeItem = BeautifulSoup(animeItem.renderContents())
32+
		baseURL = 'http://www.anime-planet.com/users/'+username+'/anime?page='+str(i)
33-
                        animeName = '' + animeItem.a.text
33+
		html = urllib.request.urlopen(baseURL).read()
34-
                        queryName = re.sub('[^A-Za-z0-9]+', '%20', animeName)
34+
		html = BeautifulSoup(html)
35-
                        queryTitle = urllib2.urlopen(queryURL + queryName).read()  
35+
		for animeItem in html.findAll('tr')[1:]:
36-
                        print(animeName)
36+
			animeItem = BeautifulSoup(animeItem.renderContents())
37-
                        search=json.loads(queryTitle.decode('utf8'))
37+
			animeName = '' + animeItem.a.text
38-
                        for x in search:
38+
			queryName = re.sub('[^A-Za-z0-9]+', '%20', animeName)
39-
                                #print(animeName)
39+
			queryTitle = urllib.request.urlopen(queryURL + queryName).read()   
40-
                                try:
40+
			print(queryTitle)
41-
                                        if animeName.lower()==x["title"].lower():
41+
			search=json.loads(queryTitle.decode('utf8'))
42-
                                                animeID=str(x["id"])
42+
			for x in search:
43-
                                        elif animeName.lower()in [y.lower() for y in x["other_titles"]["english"]]:
43+
				print(animeName)
44-
                                                animeID=str(x["id"])
44+
				try:
45-
                                        elif animeName.lower() in [j.lower() for j in x["other_titles"]["synonyms"]]:
45+
					if animeName.lower()==x["title"].lower():
46-
                                                animeID=str(x["id"])
46+
						animeID=str(x["id"])
47-
                                except KeyError as e:
47+
					elif animeName.lower()in [y.lower() for y in x["other_titles"]["english"]]:
48-
                                        pass
48+
						animeID=str(x["id"])
49-
                        if animeItem.find('td','tableStatus').text.replace('status box','').replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "")=="Watched":
49+
					elif animeName.lower() in [j.lower() for j in x["other_titles"]["synonyms"]]:
50-
                                status="Completed"
50+
						animeID=str(x["id"])
51-
                        elif animeItem.find('td','tableStatus').text.replace('status box','').replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "")=="Stalled":
51+
				except KeyError as e:
52-
                                status="On-Hold"
52+
					pass
53-
                        elif animeItem.find('td','tableStatus').text.replace('status box','').replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "")=="WanttoWatch":
53+
54-
                                status="Plan to Watch" 
54+
			f.write ('\t<anime>\n');
55-
                        elif animeItem.find('td','tableStatus').text.replace('status box','').replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "")=="Won'tWatch":
55+
			f.write ('\t\t<series_animedb_id>'+ animeID +'</series_animedb_id>\n');
56-
                                status="Dropped"
56+
			f.write ('\t\t<series_title><![CDATA['+ animeName +']]></series_title>\n');
57-
                                continue
57+
			f.write ('\t\t<series_type>' + animeItem.find('td','tableType').text + '</series_type>\n');
58-
                        else:
58+
			f.write ('\t\t<my_watched_episodes>'+ animeItem.find('td','tableEps').text.replace('&nbsp;','1') +'</my_watched_episodes>\n');
59-
                                status=animeItem.find('td','tableStatus').text.replace('status box','').replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "")
59+
			f.write ('\t\t<my_score>' + str(int(float(animeItem.img['name'])*2)) + '</my_score>\n');
60-
                        f.write ('\t<anime>\n');
60+
			f.write ('\t\t<my_status>' + animeItem.find('td','tableStatus').text.replace('status box','') +'</my_status>\n');
61-
                        f.write ('\t\t<series_animedb_id>'+ animeID +'</series_animedb_id>\n');
61+
			f.write ('\t\t<update_on_import>1</update_on_import>\n');
62-
                        f.write ('\t\t<series_title><![CDATA['+ animeName +']]></series_title>\n');
62+
			f.write ('\t</anime>\n\n');
63-
                        f.write ('\t\t<series_type>' + animeItem.find('td','tableType').text + '</series_type>\n');
63+
64-
                        f.write ('\t\t<my_id>0</my_id>\n');
64+
65-
                        f.write ('\t\t<my_watched_episodes>'+ animeItem.find('td','tableEps').text.replace('&nbsp;','1').replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "") +'</my_watched_episodes>\n');
65+
				
66-
                        f.write ('\t\t<my_start_date>0000-00-00</my_start_date>\n');
66+
f.write ('</myanimelist>\n')	
67-
                        f.write ('\t\t<my_finish_date>0000-00-00</my_finish_date>\n');
67+