Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- #This script will take your anime-planet.com username and scrape a list of your watched anime to anime-planet.xml
- #Additional info and packages:
- # Python 3.3.3 - http://python.org/download/
- # BeautifulSoup - http://www.crummy.com/software/BeautifulSoup/#Download
- #Tips:
- # * You can leave your MAL username empty if it's the same as on AnimePlanet.
- # * To install BeautifulSoup unpack it anywhere and type "setup.py install" in the console from that folder.
- # * In order to successfully import the exported Anime-Planet animelist to MAL, first export MAL animelist,
- # and copy the <myinfo> block just after <myanimelist> tag.
- from bs4 import BeautifulSoup,NavigableString
- import urllib.request,urllib.parse,base64,sys,re,codecs
- import xml.etree.ElementTree as et
- debug = True
- delimiter = "\t"
- userAgent = "Mozilla/5.0 (Windows NT 6.2; Win64; x64;) Gecko/20100101 Firefox/20.0"
- print("This script will export your anime-planet.com anime list to anime-planet.xml")
- username = input("Enter your AP username: ")
- malusername = input("Enter your MAL username: ")
- if (malusername == ""): malusername = username
- malpassword = input("Enter your MAL password: ")
- baseURL = "http://www.anime-planet.com/users/%s/anime" % username
- apiURL = "http://myanimelist.net/api/anime/search.xml"
- passStr = str("%s:%s" % (malusername, malpassword)).replace("\n", "")
- authString = str(base64.b64encode(bytes(passStr, "utf-8")), "utf-8")
- if debug: print("MAL authorization hash: " + authString)
- try:
- req = urllib.request.Request(baseURL)
- req.add_header("User-Agent", userAgent)
- html = BeautifulSoup(urllib.request.urlopen(req).read())
- pageNumber = int (html.find("li","next").findPrevious("li").next.contents[0])
- except BaseException as e:
- print("Request to " + baseURL + " failed. " +str(e))
- raise SystemExit
- f = codecs.open("anime-planet2.xml", "w", "utf-8")
- xmlData = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
- xmlData += "<myanimelist>\n"
- xmlData += "\t<myinfo>\n\t\t<!-- Fill this block from the native MAL export file. -->\n\t\t<user_export_type>1</user_export_type>\n\t</myinfo>\n"
- f.write(xmlData)
- print("Exporting rough variant of myanimelist format...")
- for i in range(1,pageNumber+1):
- try:
- req = urllib.request.Request(baseURL + "?" + urllib.parse.urlencode({"page": str(i)}))
- req.add_header("User-Agent", userAgent)
- html = BeautifulSoup(urllib.request.urlopen(req).read())
- except BaseException as e:
- print("Request to " + baseURL + "?" + urllib.parse.urlencode({"page": str(i)}) + " failed. " +str(e))
- raise SystemExit
- for animeItem in html.findAll("tr")[1:]:
- if debug: print("================")
- animeItem = BeautifulSoup(animeItem.renderContents())
- animeName = "" + animeItem.a.text
- queryTitle = ""
- try:
- titlereq = urllib.request.Request(apiURL + "?" + urllib.parse.urlencode({ "q" : animeName }))
- titlereq.add_header("Authorization", "Basic %s" % authString)
- titlereq.add_header("User-Agent", userAgent)
- queryTitle = urllib.request.urlopen(titlereq).read().decode("utf-8")
- queryTitle = re.sub(r"(?is)<synopsis>.+</synopsis>", "", queryTitle)
- except BaseException as e:
- print("Request to " + apiURL + "?" + urllib.parse.urlencode({ "q" : animeName }) + " failed. " +str(e))
- raise SystemExit
- search = ""
- if debug:
- print(apiURL + "?" + urllib.parse.urlencode({ "q" : animeName }))
- try:
- if queryTitle != '':
- search = et.fromstring(queryTitle)
- else:
- # for adding anime manually
- if debug: print("MAL ID = ?")
- continue
- except BaseException as e:
- print("Decoding of anime data failed. Error: " +str(e))
- # for adding anime manually
- continue
- localName = animeName.lower()
- animeID = ""
- for entry in search.findall("./entry"):
- try:
- if entry.find("id") is not None and entry.find("id").text.strip()!="":
- if entry.find("title") is not None and localName in entry.find("title").text.lower():
- animeID=entry.find("id").text
- break
- elif entry.find("english") is not None and localName in entry.find("english").text.lower():
- animeID=entry.find("id").text
- break
- elif entry.find("synonyms") is not None and localName in entry.find("synonyms").text.lower():
- animeID=entry.find("id").text
- break
- except:
- continue
- if animeID=="":
- if debug: print("MAL ID = ?")
- # for adding anime manually
- continue
- if debug: print("MAL ID = " + animeID)
- animeStatus = animeItem.find("td","tableStatus").text.replace("status box","").replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "");
- if animeStatus=="Watched":
- status="2"
- #"Completed"
- elif animeStatus=="Stalled":
- status="3"
- #"On-Hold"
- elif animeStatus=="WanttoWatch":
- status="6"
- #"Plan to Watch"
- elif animeStatus=="Won'tWatch":
- status="4"
- #"Dropped"
- else:
- status="1"
- #"Watching"
- #status=animeStatus
- xmlData = "\t<anime>\n"
- xmlData += "\t\t<series_animedb_id>"+ animeID +"</series_animedb_id>\n"
- xmlData += "\t\t<series_title><![CDATA["+ animeName +"]]></series_title>\n"
- xmlData += "\t\t<series_type>" + animeItem.find("td","tableType").text + "</series_type>\n"
- xmlData += "\t\t<my_id>0</my_id>\n"
- xmlData += "\t\t<my_watched_episodes>"+ animeItem.find("td","tableEps").text.replace(" ","1").replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "") +"</my_watched_episodes>\n"
- xmlData += "\t\t<my_start_date>0000-00-00</my_start_date>\n"
- xmlData += "\t\t<my_finish_date>0000-00-00</my_finish_date>\n"
- xmlData += "\t\t<my_fansub_group><![CDATA[]]></my_fansub_group>\n"
- xmlData += "\t\t<my_rated></my_rated>\n"
- xmlData += "\t\t<my_score>" + str(int(float(animeItem.img["name"])*2)).replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "") + "</my_score>\n"
- xmlData += "\t\t<my_dvd></my_dvd>\n"
- xmlData += "\t\t<my_storage></my_storage>\n"
- xmlData += "\t\t<my_status>" + status +"</my_status>\n"
- xmlData += "\t\t<my_comments><![CDATA[]]></my_comments>\n"
- xmlData += "\t\t<my_times_watched>0</my_times_watched>\n"
- xmlData += "\t\t<my_rewatch_value></my_rewatch_value>\n"
- xmlData += "\t\t<my_downloaded_eps>0</my_downloaded_eps>\n"
- xmlData += "\t\t<my_tags><![CDATA[]]></my_tags>\n"
- xmlData += "\t\t<my_rewatching>0</my_rewatching>\n"
- xmlData += "\t\t<my_rewatching_ep>0</my_rewatching_ep>\n"
- xmlData += "\t\t<update_on_import>1</update_on_import>\n"
- xmlData += "\t</anime>\n\n"
- f.write(xmlData)
- xmlData = "</myanimelist>\n"
- f.write(xmlData)
- print("Done, see anime-planet.xml")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement