Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- """
- Created by Phistrom
- 2013-05-02
- Pulls all picture information from the StromDAM database and uses the hashes,
- file locations, and tag information to copy a single file for each hash to
- the specified BASE_PICS_DIR. Each file will be placed in a folder corresponding
- to its tag information and the tags will will be written to the file.
- In the event of a crash (which seems frequent with the exmempi library that
- the Python-XMP-Toolkit relies on), the script can determine where it stopped
- at thanks to a text file containing already finished hashes that it writes
- in the same directory it's in.
- """
- import sys
- for pathv in sys.path:
- print pathv
- import binascii
- import libxmp
- import os
- import shutil
- import stromberg.dam.db_model as dam_model
- from stromberg.utils import logger
- #import timing
- BASE_PICS_DIR = '/media/onealdata1/Pictures'
- DEFAULT_ORGANIZATION = 'Stromberg'
- TAG_TYPES = {
- 1: 'Organizations',
- 2: 'Projects',
- 3: 'Products',
- 4: 'Materials',
- 5: 'Applications',
- 6: 'Other',
- 7: 'Themes',
- 8: 'Loc',
- }
- IPTC_KEYWORDS = 'Iptc.Application2.Keywords'
- MS_KEYWORDS = 'Xmp.dc.subject'
- CONVERSION_NEEDED = True
- PATH_CONVERT = (
- (r'\\stromberg-fs1\users', '/media/strombergfs1/users',),
- (r'\\stromberg-fs1\general-documents', '/media/strombergfs1/general'),
- )
- TEMPFILE_PATH = 'hashes_complete.txt'
- XMP_HIERARCHY = 'Xmp.lr.hierarchicalSubject'
- XMP_DESCRIPTION = 'Xmp.dc.description'
- ORDER_OF_TAGS_FOR_FOLDERS = [
- 3, 4, 5, 7, 8,
- ]
- DEBUG = True
- LOG = logger.get_logger('dedupe_to_folder', DEBUG, 'deduper_logs')
- def apply_tags_to_file(tags, filepath, description=''):
- """Given a dictionary of tag lists (where the key is a tag type and the
- value is a list of tags that are of that type), applies those tags to the
- file at the given filepath."""
- keywords = set()
- hierarchy = set()
- for tagtype in tags:
- for tag in tags[tagtype]:
- prefix = ''
- if TAG_TYPES[tagtype] != 'Other':
- #if anything but Other,
- prefix = TAG_TYPES[tagtype] + '|'
- tag = prefix + tag.strip()
- LOG.debug("TAG %s", tag)
- LOG.debug("DESCRIPTION %s", description)
- if '|' in tag:
- #if a tag has a | character, it is a hierarchical tag and must
- #be stored both in the Xmp.lr.hierarchicalSubject as well as
- #the regular Xmp.dc.subject without the pipes and parent tags
- hierarchy.add(tag)
- tag = tag.split('|')[-1]
- keywords.add(tag)
- if not (keywords or hierarchy or description):
- return
- xmpfile = libxmp.XMPFiles(file_path=filepath, open_forupdate=True)
- xmp = xmpfile.get_xmp()
- if keywords:
- #remove existing subject tags and replace with new ones specified
- xmp.delete_property(libxmp.consts.XMP_NS_DC, 'subject')
- for key in sorted(keywords):
- xmp.append_array_item(libxmp.consts.XMP_NS_DC, 'subject', key,
- {'prop_array_is_ordered': True, 'prop_value_is_array': True})
- if hierarchy:
- #remove existing hierarchy tags and add the new ones back
- xmp.register_namespace(libxmp.consts.XMP_NS_Lightroom, 'lr')
- xmp.delete_property(libxmp.consts.XMP_NS_Lightroom, 'lr:hierarchicalSubject')
- xmp.set_property(libxmp.consts.XMP_NS_Lightroom, 'lr:hierarchicalSubject', '',
- prop_array_is_ordered=True, prop_value_is_array=True)
- for node in sorted(hierarchy):
- xmp.append_array_item(libxmp.consts.XMP_NS_Lightroom, 'lr:hierarchicalSubject', node.strip(),
- {'prop_array_is_ordered': True, 'prop_value_is_array': True})
- xmp.delete_property(libxmp.consts.XMP_NS_DC, 'description')
- xmp.append_array_item(libxmp.consts.XMP_NS_DC, 'description', description,
- {'prop_array_is_ordered': True, 'prop_value_is_array': True})
- try:
- #none of these checks seem to help prevent the Backtrace error
- if xmpfile.can_put_xmp(xmp):
- xmpfile.put_xmp(xmp)
- if xmpfile.xmpfileptr:
- xmpfile.close_file()
- except IOError as ex:
- LOG.warning('Could not write tags to %s. %s', filepath, ex.message)
- def convert_filepath(filepath):
- for key, val in PATH_CONVERT:
- if filepath.startswith(key):
- filepath = filepath.replace(key, val, 1)
- break
- filepath = filepath.replace('\\', os.sep)
- return filepath
- def copy_to_dest(try_files, dest_folder):
- destination = None
- try:
- os.makedirs(dest_folder)
- except (IOError, OSError):
- pass
- for fil in try_files:
- try:
- filename = os.path.basename(fil)
- destination = os.path.join(dest_folder, filename)
- if os.path.exists(destination):
- LOG.debug('File has already been copied to %s', destination)
- return False
- shutil.copyfile(fil, destination)
- break
- except:
- continue
- else:
- raise IOError('Could not copy any candidates to %s.' % dest_folder)
- #print 'dat destination %s' % destination
- return destination
- def get_destination_by_tag(tags):
- """Given a dictionary of tags, determines a folder to put a file into.
- Folder will be determined first by organization, then by project, then by
- product, then by material, then by other"""
- try:
- organization = tags[1][0]
- except KeyError:
- organization = DEFAULT_ORGANIZATION
- try:
- project = tags[2][0]
- no_project = False
- except KeyError:
- project = '0NoProject'
- no_project = True
- dest_dir = os.path.join(BASE_PICS_DIR, organization, project)
- if no_project:
- for tagtype in ORDER_OF_TAGS_FOR_FOLDERS:
- if tagtype in tags:
- value = tags[tagtype][0]
- if TAG_TYPES[tagtype] == 'Loc':
- value = value.replace('|', os.sep)
- extra_dir = os.path.join(TAG_TYPES[tagtype], value)
- break
- else:
- extra_dir = 'uncategorized'
- dest_dir = os.path.join(dest_dir, extra_dir)
- return dest_dir
- def main():
- already_done = set()
- try:
- with open(TEMPFILE_PATH, 'rb') as tempfile:
- for line in tempfile:
- already_done.add(line.strip())
- except IOError:
- pass
- LOG.debug("Getting DAM files")
- all_files = dam_model.get_all_files()
- LOG.debug("Got the DAM files")
- tempfile = open(TEMPFILE_PATH, 'a')
- #x = 0
- for hashed, files in all_files.iteritems():
- try:
- hexl = binascii.hexlify(hashed)
- if hexl in already_done:
- LOG.debug('Hash %s marked as done.', hexl)
- continue
- orig_files = files
- if CONVERSION_NEEDED:
- files = [convert_filepath(fil) for fil in files]
- LOG.debug('Getting tags for %s', hexl)
- tags = dam_model.get_tags_by_hash(hexl)
- LOG.debug('Getting the destination based on tags for %s', hexl)
- newdest = get_destination_by_tag(tags)
- LOG.debug('Destination for %s will be %s', hexl, newdest)
- newfile = copy_to_dest(files, newdest)
- if not newfile:
- continue
- LOG.debug('Successfully copied to %s', newfile)
- apply_tags_to_file(tags, newfile, '|'.join(orig_files))
- tempfile.write('%s\n' % hexl)
- LOG.debug('Successfully tagged %s', newfile)
- except Exception as ex:
- LOG.error('%s occurred. Problem was %s %s',
- type(ex).__name__, ex.message, ex)
- tempfile.close()
- os.remove(TEMPFILE_PATH)
- #print
- #x += 1
- #if x >= 100:
- # break
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement