Advertisement
Guest User

Untitled

a guest
Dec 4th, 2013
148
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.36 KB | None | 0 0
  1. #!/usr/bin/python
  2. #This script will take your anime-planet.com username and scrape a list of your watched anime to anime-planet.xml
  3. #Additional info and packages:
  4. #  Python 3.3.3 - http://python.org/download/
  5. #  BeautifulSoup - http://www.crummy.com/software/BeautifulSoup/#Download
  6. #Tips:
  7. # * You can leave your MAL username empty if it's the same as on AnimePlanet.
  8. # * To install BeautifulSoup unpack it anywhere and type "setup.py install" in the console from that folder.
  9. # * In order to successfully import the exported Anime-Planet animelist to MAL, first export MAL animelist,
  10. #    and copy the <myinfo> block just after <myanimelist> tag.
  11.  
  12. from bs4 import BeautifulSoup,NavigableString
  13. import urllib.request,urllib.parse,base64,sys,re,codecs
  14. import xml.etree.ElementTree as et
  15.  
  16. debug = True
  17. delimiter = "\t"
  18. userAgent = "Mozilla/5.0 (Windows NT 6.2; Win64; x64;) Gecko/20100101 Firefox/20.0"
  19.  
  20. print("This script will export your anime-planet.com anime list to anime-planet.xml")
  21. username = input("Enter your AP username: ")
  22. malusername = input("Enter your MAL username: ")
  23. if (malusername == ""): malusername = username
  24. malpassword = input("Enter your MAL password: ")
  25.  
  26. baseURL = "http://www.anime-planet.com/users/%s/anime" % username
  27. apiURL = "http://myanimelist.net/api/anime/search.xml"
  28.  
  29. passStr = str("%s:%s" % (malusername, malpassword)).replace("\n", "")
  30. authString = str(base64.b64encode(bytes(passStr, "utf-8")), "utf-8")
  31. if debug: print("MAL authorization hash: " + authString)
  32.  
  33. try:
  34.     req = urllib.request.Request(baseURL)
  35.     req.add_header("User-Agent",  userAgent)
  36.     html = BeautifulSoup(urllib.request.urlopen(req).read())
  37.     pageNumber = int (html.find("li","next").findPrevious("li").next.contents[0])
  38. except BaseException as e:
  39.     print("Request to " + baseURL + " failed. " +str(e))
  40.     raise SystemExit
  41.  
  42. f = codecs.open("anime-planet2.xml", "w", "utf-8")
  43.  
  44. xmlData = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
  45. xmlData += "<myanimelist>\n"
  46. xmlData += "\t<myinfo>\n\t\t<!-- Fill this block from the native MAL export file. -->\n\t\t<user_export_type>1</user_export_type>\n\t</myinfo>\n"
  47. f.write(xmlData)
  48.  
  49. print("Exporting rough variant of myanimelist format...")
  50.  
  51. for i in range(1,pageNumber+1):
  52.     try:
  53.         req = urllib.request.Request(baseURL + "?" + urllib.parse.urlencode({"page": str(i)}))
  54.         req.add_header("User-Agent",  userAgent)
  55.         html = BeautifulSoup(urllib.request.urlopen(req).read())
  56.     except BaseException as e:
  57.             print("Request to " + baseURL + "?" + urllib.parse.urlencode({"page": str(i)}) + " failed. " +str(e))
  58.             raise SystemExit
  59.     for animeItem in html.findAll("tr")[1:]:
  60.         if debug: print("================")
  61.  
  62.         animeItem = BeautifulSoup(animeItem.renderContents())
  63.         animeName = "" + animeItem.a.text
  64.         queryTitle = ""
  65.         try:
  66.             titlereq = urllib.request.Request(apiURL + "?" + urllib.parse.urlencode({ "q" : animeName }))
  67.             titlereq.add_header("Authorization", "Basic %s" % authString)  
  68.             titlereq.add_header("User-Agent",  userAgent)
  69.             queryTitle = urllib.request.urlopen(titlereq).read().decode("utf-8")
  70.             queryTitle = re.sub(r"(?is)<synopsis>.+</synopsis>", "", queryTitle)
  71.         except BaseException as e:
  72.             print("Request to " + apiURL + "?" + urllib.parse.urlencode({ "q" : animeName }) + " failed. " +str(e))
  73.             raise SystemExit
  74.         search = ""
  75.         if debug:
  76.             print(apiURL + "?" + urllib.parse.urlencode({ "q" : animeName }))
  77.         try:
  78.             if queryTitle != '':
  79.                 search = et.fromstring(queryTitle)
  80.             else:
  81.                 # for adding anime manually
  82.                 if debug: print("MAL ID = ?")
  83.                 continue
  84.         except BaseException as e:
  85.             print("Decoding of anime data failed. Error: " +str(e))
  86.             # for adding anime manually
  87.             continue
  88.         localName = animeName.lower()
  89.         animeID = ""
  90.         for entry in search.findall("./entry"):
  91.             try:
  92.                 if entry.find("id") is not None and entry.find("id").text.strip()!="":
  93.                     if entry.find("title") is not None and localName in entry.find("title").text.lower():
  94.                         animeID=entry.find("id").text
  95.                         break
  96.                     elif entry.find("english") is not None and localName in entry.find("english").text.lower():
  97.                         animeID=entry.find("id").text
  98.                         break
  99.                     elif entry.find("synonyms") is not None and localName in entry.find("synonyms").text.lower():
  100.                         animeID=entry.find("id").text
  101.                         break
  102.             except:
  103.                 continue
  104.         if animeID=="":
  105.             if debug: print("MAL ID = ?")
  106.             # for adding anime manually
  107.             continue
  108.         if debug: print("MAL ID = " + animeID)
  109.         animeStatus = animeItem.find("td","tableStatus").text.replace("status box","").replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "");
  110.         if animeStatus=="Watched":
  111.             status="2"
  112.             #"Completed"
  113.         elif animeStatus=="Stalled":
  114.             status="3"
  115.             #"On-Hold"
  116.         elif animeStatus=="WanttoWatch":
  117.             status="6"
  118.             #"Plan to Watch"
  119.         elif animeStatus=="Won'tWatch":
  120.             status="4"
  121.             #"Dropped"
  122.         else:
  123.             status="1"
  124.             #"Watching"
  125.             #status=animeStatus
  126.  
  127.         xmlData = "\t<anime>\n"
  128.         xmlData += "\t\t<series_animedb_id>"+ animeID +"</series_animedb_id>\n"
  129.         xmlData += "\t\t<series_title><![CDATA["+ animeName +"]]></series_title>\n"
  130.         xmlData += "\t\t<series_type>" + animeItem.find("td","tableType").text + "</series_type>\n"
  131.         xmlData += "\t\t<my_id>0</my_id>\n"
  132.         xmlData += "\t\t<my_watched_episodes>"+ animeItem.find("td","tableEps").text.replace("&nbsp;","1").replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "") +"</my_watched_episodes>\n"
  133.         xmlData += "\t\t<my_start_date>0000-00-00</my_start_date>\n"
  134.         xmlData += "\t\t<my_finish_date>0000-00-00</my_finish_date>\n"
  135.         xmlData += "\t\t<my_fansub_group><![CDATA[]]></my_fansub_group>\n"
  136.         xmlData += "\t\t<my_rated></my_rated>\n"
  137.         xmlData += "\t\t<my_score>" + str(int(float(animeItem.img["name"])*2)).replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "") + "</my_score>\n"
  138.         xmlData += "\t\t<my_dvd></my_dvd>\n"
  139.         xmlData += "\t\t<my_storage></my_storage>\n"
  140.         xmlData += "\t\t<my_status>" + status +"</my_status>\n"
  141.         xmlData += "\t\t<my_comments><![CDATA[]]></my_comments>\n"
  142.         xmlData += "\t\t<my_times_watched>0</my_times_watched>\n"
  143.         xmlData += "\t\t<my_rewatch_value></my_rewatch_value>\n"
  144.         xmlData += "\t\t<my_downloaded_eps>0</my_downloaded_eps>\n"
  145.         xmlData += "\t\t<my_tags><![CDATA[]]></my_tags>\n"
  146.         xmlData += "\t\t<my_rewatching>0</my_rewatching>\n"
  147.         xmlData += "\t\t<my_rewatching_ep>0</my_rewatching_ep>\n"
  148.         xmlData += "\t\t<update_on_import>1</update_on_import>\n"
  149.         xmlData += "\t</anime>\n\n"
  150.         f.write(xmlData)
  151.                    
  152. xmlData = "</myanimelist>\n"
  153. f.write(xmlData)
  154.  
  155. print("Done, see anime-planet.xml")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement