#!/usr/bin/env python """ Created by Phistrom 2013-05-02 Pulls all picture information from the StromDAM database and uses the hashes, file locations, and tag information to copy a single file for each hash to the specified BASE_PICS_DIR. Each file will be placed in a folder corresponding to its tag information and the tags will will be written to the file. In the event of a crash (which seems frequent with the exmempi library that the Python-XMP-Toolkit relies on), the script can determine where it stopped at thanks to a text file containing already finished hashes that it writes in the same directory it's in. """ import sys for pathv in sys.path: print pathv import binascii import libxmp import os import shutil import stromberg.dam.db_model as dam_model from stromberg.utils import logger #import timing BASE_PICS_DIR = '/media/onealdata1/Pictures' DEFAULT_ORGANIZATION = 'Stromberg' TAG_TYPES = { 1: 'Organizations', 2: 'Projects', 3: 'Products', 4: 'Materials', 5: 'Applications', 6: 'Other', 7: 'Themes', 8: 'Loc', } IPTC_KEYWORDS = 'Iptc.Application2.Keywords' MS_KEYWORDS = 'Xmp.dc.subject' CONVERSION_NEEDED = True PATH_CONVERT = ( (r'\\stromberg-fs1\users', '/media/strombergfs1/users',), (r'\\stromberg-fs1\general-documents', '/media/strombergfs1/general'), ) TEMPFILE_PATH = 'hashes_complete.txt' XMP_HIERARCHY = 'Xmp.lr.hierarchicalSubject' XMP_DESCRIPTION = 'Xmp.dc.description' ORDER_OF_TAGS_FOR_FOLDERS = [ 3, 4, 5, 7, 8, ] DEBUG = True LOG = logger.get_logger('dedupe_to_folder', DEBUG, 'deduper_logs') def apply_tags_to_file(tags, filepath, description=''): """Given a dictionary of tag lists (where the key is a tag type and the value is a list of tags that are of that type), applies those tags to the file at the given filepath.""" keywords = set() hierarchy = set() for tagtype in tags: for tag in tags[tagtype]: prefix = '' if TAG_TYPES[tagtype] != 'Other': #if anything but Other, prefix = TAG_TYPES[tagtype] + '|' tag = prefix + tag.strip() LOG.debug("TAG %s", tag) LOG.debug("DESCRIPTION %s", description) if '|' in tag: #if a tag has a | character, it is a hierarchical tag and must #be stored both in the Xmp.lr.hierarchicalSubject as well as #the regular Xmp.dc.subject without the pipes and parent tags hierarchy.add(tag) tag = tag.split('|')[-1] keywords.add(tag) if not (keywords or hierarchy or description): return xmpfile = libxmp.XMPFiles(file_path=filepath, open_forupdate=True) xmp = xmpfile.get_xmp() if keywords: #remove existing subject tags and replace with new ones specified xmp.delete_property(libxmp.consts.XMP_NS_DC, 'subject') for key in sorted(keywords): xmp.append_array_item(libxmp.consts.XMP_NS_DC, 'subject', key, {'prop_array_is_ordered': True, 'prop_value_is_array': True}) if hierarchy: #remove existing hierarchy tags and add the new ones back xmp.register_namespace(libxmp.consts.XMP_NS_Lightroom, 'lr') xmp.delete_property(libxmp.consts.XMP_NS_Lightroom, 'lr:hierarchicalSubject') xmp.set_property(libxmp.consts.XMP_NS_Lightroom, 'lr:hierarchicalSubject', '', prop_array_is_ordered=True, prop_value_is_array=True) for node in sorted(hierarchy): xmp.append_array_item(libxmp.consts.XMP_NS_Lightroom, 'lr:hierarchicalSubject', node.strip(), {'prop_array_is_ordered': True, 'prop_value_is_array': True}) xmp.delete_property(libxmp.consts.XMP_NS_DC, 'description') xmp.append_array_item(libxmp.consts.XMP_NS_DC, 'description', description, {'prop_array_is_ordered': True, 'prop_value_is_array': True}) try: #none of these checks seem to help prevent the Backtrace error if xmpfile.can_put_xmp(xmp): xmpfile.put_xmp(xmp) if xmpfile.xmpfileptr: xmpfile.close_file() except IOError as ex: LOG.warning('Could not write tags to %s. %s', filepath, ex.message) def convert_filepath(filepath): for key, val in PATH_CONVERT: if filepath.startswith(key): filepath = filepath.replace(key, val, 1) break filepath = filepath.replace('\\', os.sep) return filepath def copy_to_dest(try_files, dest_folder): destination = None try: os.makedirs(dest_folder) except (IOError, OSError): pass for fil in try_files: try: filename = os.path.basename(fil) destination = os.path.join(dest_folder, filename) if os.path.exists(destination): LOG.debug('File has already been copied to %s', destination) return False shutil.copyfile(fil, destination) break except: continue else: raise IOError('Could not copy any candidates to %s.' % dest_folder) #print 'dat destination %s' % destination return destination def get_destination_by_tag(tags): """Given a dictionary of tags, determines a folder to put a file into. Folder will be determined first by organization, then by project, then by product, then by material, then by other""" try: organization = tags[1][0] except KeyError: organization = DEFAULT_ORGANIZATION try: project = tags[2][0] no_project = False except KeyError: project = '0NoProject' no_project = True dest_dir = os.path.join(BASE_PICS_DIR, organization, project) if no_project: for tagtype in ORDER_OF_TAGS_FOR_FOLDERS: if tagtype in tags: value = tags[tagtype][0] if TAG_TYPES[tagtype] == 'Loc': value = value.replace('|', os.sep) extra_dir = os.path.join(TAG_TYPES[tagtype], value) break else: extra_dir = 'uncategorized' dest_dir = os.path.join(dest_dir, extra_dir) return dest_dir def main(): already_done = set() try: with open(TEMPFILE_PATH, 'rb') as tempfile: for line in tempfile: already_done.add(line.strip()) except IOError: pass LOG.debug("Getting DAM files") all_files = dam_model.get_all_files() LOG.debug("Got the DAM files") tempfile = open(TEMPFILE_PATH, 'a') #x = 0 for hashed, files in all_files.iteritems(): try: hexl = binascii.hexlify(hashed) if hexl in already_done: LOG.debug('Hash %s marked as done.', hexl) continue orig_files = files if CONVERSION_NEEDED: files = [convert_filepath(fil) for fil in files] LOG.debug('Getting tags for %s', hexl) tags = dam_model.get_tags_by_hash(hexl) LOG.debug('Getting the destination based on tags for %s', hexl) newdest = get_destination_by_tag(tags) LOG.debug('Destination for %s will be %s', hexl, newdest) newfile = copy_to_dest(files, newdest) if not newfile: continue LOG.debug('Successfully copied to %s', newfile) apply_tags_to_file(tags, newfile, '|'.join(orig_files)) tempfile.write('%s\n' % hexl) LOG.debug('Successfully tagged %s', newfile) except Exception as ex: LOG.error('%s occurred. Problem was %s %s', type(ex).__name__, ex.message, ex) tempfile.close() os.remove(TEMPFILE_PATH) #print #x += 1 #if x >= 100: # break if __name__ == '__main__': main()