Guest User

ManiaExchange Downloader

a guest
Feb 11th, 2018
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.13 KB | None | 0 0
  1. import xml.etree.ElementTree as etree
  2. import wget
  3. import os
  4. import time
  5. import sys, getopt
  6. import redis
  7. import csv
  8.  
  9. base_path = "/srv/dev-disk-by-label-Intermediates/Downloads/TMX/"
  10.  
  11. exemel = base_path + "search.xml"
  12.  
  13. current_page = 1
  14.  
  15. lost_tracks = []
  16.  
  17. csv_out = base_path + "prev_missing.csv"
  18.  
  19. database = redis.Redis(host='127.0.0.1',port=6379)
  20.  
  21. def help_out():
  22.     print("Proper usage is as follows...")
  23.     print("-h : This message.")
  24.     print("-a : Author search name")
  25.     print("-e : Environment search name")
  26.     print("---ENVIRONMENT VALUES---")
  27.     print("1 : Canyon")
  28.     print("2 : Stadium")
  29.     print("3 : Valley")
  30.     print("4 : Lagoon")
  31.     exit()
  32.  
  33. def arghs(argv):
  34.     try:
  35.         opts, args = getopt.getopt(argv,"hs:a:e:")
  36.     except getopt.GetoptError:
  37.         help_out()
  38.        
  39.     for opt, arg in opts:
  40.         if opt == "-h":
  41.             help_out()
  42.         elif opt == "-a":
  43.             author = arg
  44.             global search_url_base
  45.             search_url_base = "https://tm.mania-exchange.com/tracksearch2/search?api=on&format=xml&author=" + author + "&limit=100"
  46.             TMX().download_search_results()
  47.         elif opt == "-e":
  48.             environment = arg
  49.             global search_url_base
  50.             search_url_base = "https://tm.mania-exchange.com/tracksearch2/search?api=on&format=xml&environments=" + str(environment) + "&limit=100"
  51.             TMX().download_search_results()
  52.         elif opt == "":
  53.             print("Check the help for usage.")
  54.            
  55.  
  56. class TMX(object):
  57.  
  58.     def get_total_tracks(self):
  59.         self.tree = etree.parse(exemel)
  60.         self.root = self.tree.getroot()
  61.         self.total_tracks = int(self.root[1].text)
  62.         self.pages = int(self.total_tracks/100) + 1
  63.  
  64.     def get_searched_tracks(self):
  65.         num_tracks = len(self.root[0])
  66.         for i in range(0,num_tracks):
  67.                 track_id = self.root[0][i][0].text
  68.                 track_author_test = self.root[0][i][2].text
  69.                 track_author = track_author_test
  70.                 track_name_test = self.root[0][i][5].text
  71.                 track_name = track_name_test
  72.                 track_environment = self.root[0][i][16].text
  73.                 updated = self.root[0][i][4].text
  74.            
  75.                 url = "https://tm.mania-exchange.com/tracks/download/" + track_id
  76.                 path = base_path + track_environment + "/" + track_author + "/"
  77.        
  78. #       url = url_test.encode('utf-8')
  79. #       path = path_test.decode('utf-8')
  80.  
  81.                 output = os.path.dirname(path)
  82. #                output = test_output.encode('utf-8')
  83.  
  84.                 if not os.path.exists(output):
  85.                     os.makedirs(output)
  86.                
  87.                 global current_page
  88.                 current_track = ((current_page-1)*100) + i + 1
  89.        
  90.                 print("\n\n\nPage number: %d" % current_page)
  91.                 print("Track number: %d of %d" % (current_track,self.total_tracks))
  92.                 print("Track name: %s" % track_name)
  93.                 print("Track author: %s" % track_author)                
  94.  
  95.                 db_entry = database.get(track_id)
  96.                 if db_entry == updated:
  97.                     print ("Up to date.")
  98.                 else:
  99. #                    wget.download(url,path)
  100.                    ext_command = 'wget --content-disposition -P "' + path.encode('utf-8') + '" ' + url.encode('utf-8')
  101.                    dl_chk = os.system(ext_command)
  102.                    if dl_chk == 0:
  103.                        database.set(track_id,updated)                
  104.                        print ("Success!")
  105.                    else:
  106.                        lost_tracks.append(track_id)
  107.                        print ("Possible lost track.")
  108.                
  109.         if current_page == self.pages:
  110.             print ("\n\nComplete?")
  111.             print ("Potentially lost tracks...")
  112.             print (lost_tracks)
  113.             print ("These will be placed in a file for reference.")
  114.             with open(csv_out, "w") as output:
  115.                 writer = csv.writer(output, lineterminator='\n')
  116.                 for val in lost_tracks:
  117.                     writer.writerow([val])
  118.         else:
  119.             current_page = current_page + 1
  120.             self.download_search_results()
  121.     ## Updated At is index 4 on each track!
  122.  
  123.     def download_search_results(self):
  124.         search_url_current = search_url_base + "&page=" + str(current_page)
  125.         if current_page != 1:
  126.             os.remove(exemel)
  127.         wget.download(search_url_current,exemel)
  128.         try:
  129.             self.tree = etree.parse(exemel)
  130.         except etree.ParseError:
  131.             print ("Failed to parse. Possible server error. Pausing 30 seconds before attempting again.")
  132.             time.sleep(30)
  133.             print ("Attempting again...")
  134.             self.download_search_results()
  135.         else:
  136.             if current_page == 1:
  137.                 self.get_total_tracks()
  138.             self.root = self.tree.getroot()
  139.             self.get_searched_tracks()
  140.  
  141.  
  142. try:
  143.     req_arg = sys.argv[1]
  144. except IndexError:
  145.     help_out()
  146.    
  147. current_page = 1
  148. arghs(sys.argv[1:])
Add Comment
Please, Sign In to add comment