Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- """
- This script synchronizes a music collection recursivly one way from a losless format source directory to a lossy file format target directory.
- for Details see README.md
- This software is licensed under the GNU GPLv3 without any warranty as is with ABSOLUTELY NO WARRANTY.
- """
- import os
- import logging
- from optparse import OptionParser
- import multiprocessing
- import time
- import click
- parser = OptionParser(usage="usage: %prog [options] source_dir target_dir")
- parser.add_option("-l", "--loglevel", dest="loglevel", default="INFO", help="Set's the log level (ie. the amount of output) possible values: DEBUG, INFO, WARNING, ERROR, CRITICAL")
- parser.add_option("-d", "--delete", action="store_true", dest="delete", default=False, help="Remove files in the target directory that don't have a corresponding file in the source directory.")
- parser.add_option("-f", "--format", dest="format", default="mp3", type="choice", choices=["ogg", "mp3"], help="Set the target format to mp3 or ogg (default is mp3)")
- parser.add_option("-s", "--followlinks", action="store_true", dest="followlinks", default=False, help="By default the script will not walk down into symbolic links that resolve to directories. Set followlinks to visit directories pointed to by symlinks, on systems that support them.")
- parser.add_option("-w", "--target_win", action="store_true", dest="win", help="Convert the filenames to Windows convention (for example if you copy to a FAT Partition)")
- parser.add_option("-r", "--resample", action="store_true", dest="resample", default=False, help="Resample to 44.1KHz when converting")
- parser.add_option("-m", "--multiprocess", action="store_true", dest="multiprocess", default=False, help="Use multiprocessing to convert the files (default is false)")
- parser.add_option("-p", "--nproc", dest="nproc", default=multiprocessing.cpu_count(), type="int", help="Set the number of processes used to convert the files, ignored if multiprocess option is not active (default is the number of detected CPUs)")
- (options, args) = parser.parse_args()
- if len(args) <> 2:
- parser.error("incorrect number of arguments")
- try:
- level = int(getattr(logging, options.loglevel))
- except (AttributeError, ValueError, TypeError):
- parser.error("incorrect loglevel value '%s'" % options.loglevel)
- for path in args:
- if not os.path.exists(path):
- parser.error("path does not exist: %s" % path)
- if args[0] == args[1]:
- parser.error("source and target path must not be the same")
- if options.win:
- illegal_characters = '?[],\=+<>:;"*|^'
- else:
- illegal_characters = ''
- target_format = ""
- if options.format in ["mp3", "ogg"]:
- target_format = options.format
- else:
- parser.error("target format %s not supported" % options.format)
- if(options.multiprocess and options.nproc < 1):
- parser.error("the number of processes must be superior to 0")
- logging.basicConfig(level=level)
- source_dir = args[0]
- target_dir = args[1]
- flac_tags_synonyms = {
- "title": ["title"],
- "tracknumber": ["tracknumber"],
- "genre": ["genre"],
- "date": ["date"],
- "artist": ["artist"],
- "album": ["album"],
- "albumartist": ["albumartist"],
- "discnumber": ["discnumber"],
- "totaldiscs": ["totaldiscs", "disctotal"],
- "totaltracks": ["totaltracks", "tracktotal"],
- "composer": ["composer"]}
- def sha1OfFile(filepath):
- import hashlib
- with open(filepath, 'rb') as f:
- return hashlib.sha1(f.read()).hexdigest()
- def create_ID3V2_tag_values_from_flac(source):
- id3_tags_dict = dict.fromkeys(flac_tags_synonyms, "")
- id3_tags_dict['tracknumber'] = "0"
- id3_tags = os.popen("metaflac --export-tags-to=- %s" % shellquote(source)).read().split("\n")
- logging.debug("id3: %s" % id3_tags)
- for id3 in id3_tags:
- if id3:
- try:
- tag, value = id3.split("=", 1)
- except ValueError:
- logging.warning("id3 tag: '%s' ignored." % id3)
- else:
- try:
- reference_tag = [reference_key for (reference_key, reference_values)
- in flac_tags_synonyms.items() if tag.lower() in reference_values][0]
- if (reference_tag in ["composer", "artist"] and id3_tags_dict[reference_tag] != ""):
- #tag value is a list, mp3 id3 v2 separator is a /
- id3_tags_dict[reference_tag] = id3_tags_dict[reference_tag] + "/" + value
- else:
- id3_tags_dict[reference_tag] = value
- except IndexError:
- logging.info("unsupported id3 tag '%s' ignored" % id3)
- for key in id3_tags_dict.keys():
- id3_tags_dict[key] = shellquote(id3_tags_dict[key])
- return id3_tags_dict
- def flac_to_mp3(source, target):
- cmd_dict = create_ID3V2_tag_values_from_flac(source)
- cmd_dict['flac_to_mp3_source_flac'] = shellquote(source)
- cmd_dict['flac_to_mp3_target_mp3'] = shellquote(target)
- cmd_dict['flac_to_mp3_enc_opts'] = "-b 256"
- cmd_dict['sha1_of_source'] = sha1OfFile(source)
- if options.resample:
- cmd_dict['flac_to_mp3_enc_opts'] += " --resample 44.1"
- cmdstr = "flac -cd %(flac_to_mp3_source_flac)s | lame %(flac_to_mp3_enc_opts)s -h --add-id3v2 "\
- "--tt %(title)s " \
- "--tn %(tracknumber)s/%(totaltracks)s " \
- "--tg %(genre)s "\
- "--ty %(date)s "\
- "--tc %(sha1_of_source)s " \
- "--ta %(artist)s " \
- "--tl %(album)s " \
- "--tv TPE2=%(albumartist)s " \
- "--tv TPOS=%(discnumber)s/%(totaldiscs)s " \
- "--tv TCOM=%(composer)s " \
- "- %(flac_to_mp3_target_mp3)s" % cmd_dict
- logging.debug(cmdstr)
- os.system(cmdstr)
- def get_extension(filename):
- """Returns the file extension of given string"""
- return os.path.splitext(filename)[1].lower()
- def is_newer(source, target):
- source_mtime = os.path.getmtime(source)
- target_mtime = os.path.getmtime(target)
- return (source_mtime >= target_mtime)
- def is_valid_shasum(source, target):
- logging.debug("checking checksum of %s" % source)
- shasum = sha1OfFile(source)
- t = eyeD3.Tag()
- t.link(target)
- try:
- comment = t.getComment()
- except:
- logging.error("Could not read comment tag of %s", target)
- return False
- return (shasum == comment)
- def x_to_ogg(source, target):
- oggencopts = "-q10" # 256 kbit/s
- if options.resample:
- oggencopts += " --resample 44100"
- # This automatically copies tags
- cmdstr = "oggenc %s -Q -o %s %s" % (oggencopts, shellquote(target), shellquote(source))
- logging.debug(cmdstr)
- os.system(cmdstr)
- def cp(source, target):
- os.system("cp %s %s" % (shellquote(source), shellquote(target)))
- def wav_to_mp3(source, target):
- # TODO does it copy tags too?
- lame_opts = ""
- if options.resample:
- lame_opts += " --resample 44.1"
- os.system("lame %s -h %s %s" % (lame_opts, shellquote(source), shellquote(target)))
- convert_map = {".ogg": [".ogg", cp],
- ".mp3": [".mp3", cp],
- ".keep":[".keep", cp],
- ".skip":[".skip", cp],
- ".jpg": [".jpg", cp],
- ".jpeg":[".jpg", cp]}
- # conditionally add conversion to the target format to the convert_map
- if target_format == "ogg":
- convert_map.update({".flac":[".ogg", x_to_ogg],
- ".wav": [".ogg", x_to_ogg]})
- elif target_format == 'mp3':
- convert_map.update({".flac":[".mp3", flac_to_mp3],
- ".wav": [".mp3", wav_to_mp3]})
- def shellquote(s):
- return "'" + s.replace("'", "'\\''") + "'"
- def clean(target_fname):
- source_fname = target_fname.replace(target_dir, source_dir)
- if os.path.isdir(target_fname):
- if not os.path.isdir(source_fname):
- logging.info("removing target directory %s" % target_fname)
- os.system("rmdir %s" % shellquote(target_fname))
- return
- extless_fname = os.path.splitext(source_fname)[0]
- for ext in convert_map.iterkeys():
- test_fname = "%s%s" % (extless_fname, ext)
- if os.path.isfile(test_fname):
- logging.debug("clean: source %s exists" % test_fname)
- return
- # source file not found, get rid of target file
- logging.info("removing target file %s" % target_fname)
- os.unlink(target_fname)
- def convert(source_fname):
- # target filename = source filename in target directory
- target_fname = source_fname.replace(source_dir, target_dir)
- # determine type of conversion
- cmd = None
- if os.path.isdir(source_fname):
- if not os.path.isdir(target_fname):
- # create target directory:w
- logging.debug("creating directory %s" % target_fname)
- os.system("mkdir %s" % shellquote(target_fname))
- return
- elif os.path.isfile(source_fname) or os.path.islink(source_fname):
- try:
- # determine extension
- ext = get_extension(source_fname)
- conv = convert_map[ext]
- except KeyError:
- logging.warning("File extension '%s' not supported." % (ext))
- else:
- # replace the extension of the target filename with the one found in the convert_map
- target_fname = os.path.splitext(target_fname)[0] + "%s" % conv[0]
- # and use the corresponding conversion command from the convert_map
- cmd = conv[1]
- else:
- logging.error("File type not supported.")
- # replace 'illegal characters' in the target filename with dashes
- for c in illegal_characters:
- target_fname = target_fname.replace(c, "-")
- if cmd:
- logging.debug("cmd: %s, source: %s, target: %s" % (cmd, source_fname, target_fname))
- if os.path.exists(target_fname):
- if is_newer(source_fname, target_fname):
- # source is newer, create new target file
- logging.debug("Target '%s' exists, but Source is newer" % target_fname)
- cmd(source_fname, target_fname)
- else:
- # nothing to do
- logging.debug("Target '%s' already exists" % target_fname)
- else:
- # create target
- logging.debug("Target %s not found, converting source file" % target_fname)
- cmd(source_fname, target_fname)
- else:
- logging.debug("Source '%s' ignored." % source_fname)
- def do_single_process(file_list):
- with click.progressbar(file_list) as bar:
- for file in bar:
- convert(file)
- def do_multi_process(file_list):
- pool = None
- try:
- logging.debug("Creating the process pool")
- pool = multiprocessing.Pool(number_of_processes())
- results = pool.map_async(convert, file_list)
- #Specify a timeout in order to receive control-c signal
- result = results.get(0x0FFFFF)
- except KeyboardInterrupt:
- logging.error("Control-c pressed, conversion terminated")
- finally:
- logging.debug("Ensuring the processes are stopped")
- if pool:
- pool.terminate()
- logging.debug("Processes stopped")
- def number_of_processes():
- if(options.multiprocess):
- return options.nproc
- else:
- return 1
- def log_elapsed_time(start, end):
- elapsed_time = end - start
- nb_hours, remainder = divmod(elapsed_time, 3600)
- nb_mins, remainder = divmod(remainder, 60)
- nb_secs, remainder = divmod(remainder, 1)
- logging.info("Music collection synchronization performed in %02d:%02d:%02d" % (nb_hours, nb_mins, nb_secs))
- if __name__ == '__main__':
- dir_list = []
- file_list = []
- start = time.time()
- for (path, dirs, files) in os.walk(source_dir, followlinks=options.followlinks):
- for dir_name in dirs:
- source = os.path.join(path, dir_name)
- dir_list.append(source)
- for file_name in files:
- source = os.path.join(path, file_name)
- file_list.append(source)
- # The directories are handled first to make sure they all exist when the music files are generated
- logging.info("Starting directory synchronization")
- do_single_process(dir_list)
- if(options.multiprocess and options.nproc > 1):
- logging.info("Starting file synchronization with %d processes" % number_of_processes())
- do_multi_process(file_list)
- else:
- logging.info("Starting file synchronization with 1 process")
- do_single_process(file_list)
- if options.delete:
- # and now scan target dir for files
- for (path, dirs, files) in os.walk(target_dir):
- for file_name in files:
- target = os.path.join(path, file_name)
- clean(target)
- # and for directories
- for (path, dirs, files) in os.walk(target_dir):
- for dir_name in dirs:
- target = os.path.join(path, dir_name)
- clean(target)
- os.system("collectiongain -f %s" % shellquote(target_dir))
- end = time.time()
- log_elapsed_time(start, end)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement