ManiaExchange Downloader

import xml.etree.ElementTree as etree
import wget
import os
import time
import sys, getopt
import redis
import csv

base_path = "/srv/dev-disk-by-label-Intermediates/Downloads/TMX/"

exemel = base_path + "search.xml"

current_page = 1

lost_tracks = []

csv_out = base_path + "prev_missing.csv"

database = redis.Redis(host='127.0.0.1',port=6379)

def help_out():
    print("Proper usage is as follows...")
    print("-h : This message.")
    print("-a : Author search name")
    print("-e : Environment search name")
    print("---ENVIRONMENT VALUES---")
    print("1 : Canyon")
    print("2 : Stadium")
    print("3 : Valley")
    print("4 : Lagoon")
    exit()

def arghs(argv):
    try:
        opts, args = getopt.getopt(argv,"hs:a:e:")
    except getopt.GetoptError:
        help_out()

    for opt, arg in opts:
        if opt == "-h":
            help_out()
        elif opt == "-a":
            author = arg
            global search_url_base
            search_url_base = "https://tm.mania-exchange.com/tracksearch2/search?api=on&format=xml&author=" + author + "&limit=100"
            TMX().download_search_results()
        elif opt == "-e":
            environment = arg
            global search_url_base
            search_url_base = "https://tm.mania-exchange.com/tracksearch2/search?api=on&format=xml&environments=" + str(environment) + "&limit=100"
            TMX().download_search_results()
        elif opt == "":
            print("Check the help for usage.")


class TMX(object):

    def get_total_tracks(self):
        self.tree = etree.parse(exemel)
        self.root = self.tree.getroot()
        self.total_tracks = int(self.root[1].text)
        self.pages = int(self.total_tracks/100) + 1

    def get_searched_tracks(self):
        num_tracks = len(self.root[0])
        for i in range(0,num_tracks):
                track_id = self.root[0][i][0].text
                track_author_test = self.root[0][i][2].text
                track_author = track_author_test
                track_name_test = self.root[0][i][5].text
                track_name = track_name_test
                track_environment = self.root[0][i][16].text
                updated = self.root[0][i][4].text

                url = "https://tm.mania-exchange.com/tracks/download/" + track_id
                path = base_path + track_environment + "/" + track_author + "/"

#       url = url_test.encode('utf-8')
#       path = path_test.decode('utf-8')

                output = os.path.dirname(path)
#                output = test_output.encode('utf-8')

                if not os.path.exists(output):
                    os.makedirs(output)

                global current_page
                current_track = ((current_page-1)*100) + i + 1

                print("\n\n\nPage number: %d" % current_page)
                print("Track number: %d of %d" % (current_track,self.total_tracks))
                print("Track name: %s" % track_name)
                print("Track author: %s" % track_author)

                db_entry = database.get(track_id)
                if db_entry == updated:
                    print ("Up to date.")
                else:
#                    wget.download(url,path)
                   ext_command = 'wget --content-disposition -P "' + path.encode('utf-8') + '" ' + url.encode('utf-8')
                   dl_chk = os.system(ext_command)
                   if dl_chk == 0:
                       database.set(track_id,updated)
                       print ("Success!")
                   else:
                       lost_tracks.append(track_id)
                       print ("Possible lost track.")

        if current_page == self.pages:
            print ("\n\nComplete?")
            print ("Potentially lost tracks...")
            print (lost_tracks)
            print ("These will be placed in a file for reference.")
            with open(csv_out, "w") as output:
                writer = csv.writer(output, lineterminator='\n')
                for val in lost_tracks:
                    writer.writerow([val])
        else:
            current_page = current_page + 1
            self.download_search_results()
    ## Updated At is index 4 on each track!

    def download_search_results(self):
        search_url_current = search_url_base + "&page=" + str(current_page)
        if current_page != 1:
            os.remove(exemel)
        wget.download(search_url_current,exemel)
        try:
            self.tree = etree.parse(exemel)
        except etree.ParseError:
            print ("Failed to parse. Possible server error. Pausing 30 seconds before attempting again.")
            time.sleep(30)
            print ("Attempting again...")
            self.download_search_results()
        else:
            if current_page == 1:
                self.get_total_tracks()
            self.root = self.tree.getroot()
            self.get_searched_tracks()


try:
    req_arg = sys.argv[1]
except IndexError:
    help_out()

current_page = 1
arghs(sys.argv[1:])