Advertisement
Guest User

ImportmangalistfromAPtoMAL

a guest
Apr 12th, 2017
90
1
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 11.60 KB | None | 1 0
  1. #!/usr/bin/python
  2. #This script will take your manga-planet.com username and add your list to MAL using its API
  3. #Errors are output so you can enter those manually
  4. #set debug = True to get more information on all of your entries
  5. #Additional info and packages:
  6. #  Python 3.3.3 - http://python.org/download/
  7. #  BeautifulSoup4 - http://www.crummy.com/software/BeautifulSoup/#Download
  8. #Tips:
  9. # * You can leave your MAL username empty if it's the same as on mangaPlanet.
  10. # * To install BeautifulSoup unpack it anywhere and type "setup.py install" in the console from that folder.
  11. # * In order to successfully import the exported anime-Planet mangalist to MAL, first export MAL mangalist,
  12. #    and copy the <myinfo> block just after <myanimelist> tag.
  13.  
  14. from bs4 import BeautifulSoup,NavigableString
  15. import urllib.request,urllib.parse,base64,sys,re,codecs
  16. import xml.etree.ElementTree as et
  17.  
  18. debug = False
  19. delimiter = "\t"
  20. userAgent = "Mozilla/5.0 (Windows NT 6.2; Win64; x64;) Gecko/20100101 Firefox/20.0"
  21.  
  22. print("This script will export your anime-planet.com anime list to myanimelist.net")
  23.  
  24. username = input("Enter your AP username:")
  25. malusername = input("Enter your MAL username:")
  26. if (malusername == ""): malusername = username
  27. malpassword = input("Enter your MAL password:")
  28.  
  29. baseURL = "http://www.anime-planet.com/users/%s/manga" %username
  30. apiURL = "https://myanimelist.net/api/manga/search.xml"
  31. apiURLadd = "https://myanimelist.net/api/mangalist/add/%s.xml"
  32. apiURLupdate = "https://myanimelist.net/api/mangalist/update/%s.xml"
  33.  
  34. passStr = str("%s:%s" % (malusername, malpassword)).replace("\n", "")
  35. authString = str(base64.b64encode(bytes(passStr, "utf-8")), "utf-8")
  36. if debug: print("MAL authorization hash:" + authString)
  37.  
  38. #Try to get HTML of first page.
  39. try:
  40.     req = urllib.request.Request(baseURL)
  41.     req.add_header("User-Agent",  userAgent)
  42.     html = BeautifulSoup(urllib.request.urlopen(req).read(), "html.parser")
  43.     pageNumber = int (html.find("li","next").findPrevious("li").next.contents[0])
  44.     #if your list is only one page, uncomment the line below and comment the line above
  45.     #pageNumber = int (html.find('li','next').findPrevious('li').contents[0])
  46. except BaseException as e:
  47.     print("Request to " + baseURL + " failed. " +str(e))
  48.     raise SystemExit
  49.  
  50. print("Processing AP list and requesting data from MAL...")
  51.  
  52. #loop through all of your pages
  53. for i in range(1,pageNumber+1):
  54.     try:
  55.         req = urllib.request.Request(baseURL + "?" + urllib.parse.urlencode({"page": str(i)}))
  56.         if debug: print("Calling URL:" + baseURL + "?" + urllib.parse.urlencode({"page": str(i)}))
  57.         req.add_header("User-Agent",  userAgent)
  58.         html = BeautifulSoup(urllib.request.urlopen(req).read(), "html.parser")
  59.     except BaseException as e:
  60.             print("Request to " + baseURL + "?" + urllib.parse.urlencode({"page": str(i)}) + " failed. " +str(e))
  61.             raise SystemExit
  62.     #loop through all of the manga posters on page i
  63.     for mangaItem in html.findAll("li",class_="card"):
  64.         mangaItem = BeautifulSoup(mangaItem.renderContents(), "html.parser")
  65.         mangaName = "" + mangaItem.a.div.img["alt"]
  66.         #pretty apostophe was breaking things
  67.         mangaName = mangaName.replace("'","'")
  68.         queryTitle = ""
  69.         try:
  70.             titlereq = urllib.request.Request(apiURL + "?" + urllib.parse.urlencode({ "q" : mangaName }))
  71.             titlereq.add_header("Authorization", "Basic %s" % authString)
  72.             titlereq.add_header("User-Agent",  userAgent)
  73.             queryTitle = urllib.request.urlopen(titlereq).read().decode("utf-8")
  74.             #I think this removes the synopsis for some reason, whatever
  75.             queryTitle = re.sub(r"(?is)<synopsis>.+</synopsis>", "", queryTitle)
  76.         except BaseException as e:
  77.             print("manga: " + mangaName)
  78.             print("Request to " + apiURL + "?" + urllib.parse.urlencode({ "q" : mangaName }) + " failed. " +str(e))
  79.             raise SystemExit
  80.         #get the status, which is now a class name
  81.         status = mangaItem.find("div","statusArea").span["class"][0]
  82.         formattedStatus = ""
  83.         if status=="status6":
  84.             formattedStatus = "won't read"
  85.             status="4"
  86.         elif status=="status3":
  87.             formattedStatus = "dropped"
  88.             status="4"
  89.         elif status=="status4":
  90.             formattedStatus = "want to read"
  91.             status="6"
  92.         elif status=="status5":
  93.             formattedStatus = "stalled"
  94.             status="3"
  95.         elif status=="status1":
  96.             formattedStatus = "read"
  97.             status="2"
  98.         elif status=="status2":
  99.             formattedStatus = "reading"
  100.             status="1"
  101.         search = ""
  102.         try:
  103.             if queryTitle != '':
  104.                 search = et.fromstring(queryTitle)
  105.                 if debug: print("================")
  106.                 if debug: print("manga: " + mangaName)
  107.                 if debug: print(apiURL + "?" + urllib.parse.urlencode({ "q" : mangaName }))
  108.             else:
  109.                 # This item failed to get a title match
  110.                 if ":" not in mangaName:
  111.                     print("================")
  112.                     print("manga: " + mangaName)
  113.                     print(apiURL + "?" + urllib.parse.urlencode({ "q" : mangaName }))
  114.                     print("Search failed; no match found.")
  115.                     print("Status: " + formattedStatus)
  116.                     continue
  117.                 else:
  118.                     #try truncated name for initial search
  119.                     formattedName = mangaName.split(":")[0]
  120.                     try:
  121.                         titlereq = urllib.request.Request(apiURL + "?" + urllib.parse.urlencode({ "q" : formattedName }))
  122.                         titlereq.add_header("Authorization", "Basic %s" % authString)
  123.                         titlereq.add_header("User-Agent",  userAgent)
  124.                         queryTitle = urllib.request.urlopen(titlereq).read().decode("utf-8")
  125.                         #I think this removes the synopsis for some reason, whatever
  126.                         queryTitle = re.sub(r"(?is)<synopsis>.+</synopsis>", "", queryTitle)
  127.                     except BaseException as e:
  128.                         print("manga: " + mangaName)
  129.                         print("Request to " + apiURL + "?" + urllib.parse.urlencode({ "q" : formattedName }) + " failed. " +str(e))
  130.                         raise SystemExit
  131.                     if queryTitle != '':
  132.                         search = et.fromstring(queryTitle)
  133.                         if debug: print("================")
  134.                         if debug: print("manga: " + mangaName)
  135.                         if debug: print(apiURL + "?" + urllib.parse.urlencode({ "q" : formattedName }))
  136.                     else:
  137.                         # This item failed to get a title match
  138.                         print("================")
  139.                         print("manga: " + mangaName)
  140.                         print(apiURL + "?" + urllib.parse.urlencode({ "q" : formattedName }))
  141.                         print("Search failed; no match found.")
  142.                         print("Status: " + formattedStatus)
  143.                         continue
  144.                 continue
  145.         except BaseException as e:
  146.             print("Decoding of manga data failed. Error: " +str(e))
  147.             # for adding manga manually
  148.             continue
  149.         localName = mangaName.lower().replace(":","").replace("(","").replace(")","")
  150.         mangaID = ""
  151.         chapterCount = ""
  152.         #check all results for an id
  153.         for entry in search.findall("./entry"):
  154.             try:
  155.                 if entry.find("id") is not None and entry.find("id").text.strip()!="":
  156.                     if entry.find("title") is not None and localName in entry.find("title").text.lower().replace(":","").replace("(","").replace(")",""):
  157.                         mangaID=entry.find("id").text
  158.                         chapterCount = entry.find("chapters").text
  159.                         break
  160.                     elif entry.find("english") is not None and localName in entry.find("english").text.lower().replace(":","").replace("(","").replace(")",""):
  161.                         mangaID=entry.find("id").text
  162.                         chapterCount = entry.find("chapters").text
  163.                         break
  164.                     elif entry.find("synonyms") is not None and localName in entry.find("synonyms").text.lower().replace(":","").replace("(","").replace(")",""):
  165.                         mangaID=entry.find("id").text
  166.                         chapterCount = entry.find("chapters").text
  167.                         break
  168.             except:
  169.                 continue
  170.         if mangaID=="":
  171.             print("No MAL ID found in returned results.")
  172.             continue
  173.         if debug: print("MAL ID = " + mangaID)
  174.  
  175.         xmlData = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
  176.         xmlData += "<entry>\n"
  177.  
  178.         if status == "4" or status == "6":
  179.             xmlData += "\t<chapter></chapter>\n"
  180.         #had to include read here because there's no way to get the # of ch anymore from AP
  181.         elif status == "2":
  182.             xmlData += "\t<chapter>" + chapterCount + "</chapter>\n"
  183.         else:
  184.             xmlData += "\t<chapter>"+ mangaItem.find("div","statusArea").text.replace("ch","").replace("vol","").replace("\t", "").replace("\n", "").replace("\r", "").replace(" ", "") +"</chapter>\n"
  185.         xmlData += "\t<status>" + status +"</status>\n"
  186.         try:
  187.             rating = mangaItem.find("div", attrs={"class": "ttRating"}).text;
  188.             xmlData += "\t<score>" + str(int(float(rating)*2)) + "</score>\n"
  189.         except:
  190.             xmlData += "\t<score></score>\n"
  191.            
  192.         xmlData += "\t<downloaded_chapters></downloaded_chapters>\n"
  193.         xmlData += "\t<storage_type></storage_type>\n"
  194.         xmlData += "\t<storage_value></storage_value>\n"
  195.         xmlData += "\t<times_reread></times_reread>\n"
  196.         xmlData += "\t<reread_value></reread_value>\n"
  197.         xmlData += "\t<date_start></date_start>\n"
  198.         xmlData += "\t<date_finish></date_finish>\n"
  199.         xmlData += "\t<priority></priority>\n"
  200.         xmlData += "\t<enable_discussion></enable_discussion>\n"
  201.         xmlData += "\t<enable_rereading></enable_rereading>\n"
  202.         xmlData += "\t<comments></comments>\n"
  203.         xmlData += "\t<fansub_group></fansub_group>\n"
  204.         xmlData += "\t<tags></tags>\n"
  205.         xmlData += "</entry>\n"
  206.  
  207.         params = {'id' : mangaID, 'data' : xmlData}
  208.         isAdded = False
  209.         try:
  210.             if debug:
  211.                 print("Trying to add manga... ")
  212.             url = urllib.request.Request(apiURLadd % mangaID, urllib.parse.urlencode(params).encode("utf-8"))
  213.             url.add_header("Authorization", "Basic %s" % authString)
  214.             url.add_header("User-Agent",  userAgent)
  215.             urllib.request.urlopen(url)
  216.             isAdded = True
  217.         except:
  218.             isAdded = False
  219.         if not isAdded:
  220.             try:
  221.                 if debug:
  222.                     print("\rTrying to update manga... ")
  223.                 url = urllib.request.Request(apiURLupdate % mangaID, urllib.parse.urlencode(params).encode("utf-8"))
  224.                 url.add_header("Authorization", "Basic %s" % authString)
  225.                 url.add_header("User-Agent",  userAgent)
  226.                 urllib.request.urlopen(url)
  227.                 isAdded = True
  228.             except:
  229.                 isAdded = False
  230.         if debug:
  231.             if isAdded: sys.stdout.write("OK\n")
  232.             else: sys.stdout.write("FAILED\n")
  233.             sys.stdout.flush()
  234.  
  235. print("\nDone")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement