Advertisement
Guest User

dedupe_to_folder.py

a guest
May 5th, 2013
124
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 8.01 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. """
  4. Created by Phistrom
  5. 2013-05-02
  6.  
  7. Pulls all picture information from the StromDAM database and uses the hashes,
  8. file locations, and tag information to copy a single file for each hash to
  9. the specified BASE_PICS_DIR. Each file will be placed in a folder corresponding
  10. to its tag information and the tags will will be written to the file.
  11.  
  12. In the event of a crash (which seems frequent with the exmempi library that
  13. the Python-XMP-Toolkit relies on), the script can determine where it stopped
  14. at thanks to a text file containing already finished hashes that it writes
  15. in the same directory it's in.
  16. """
  17.  
  18. import sys
  19. for pathv in sys.path:
  20.     print pathv
  21.  
  22.  
  23. import binascii
  24. import libxmp
  25. import os
  26. import shutil
  27. import stromberg.dam.db_model as dam_model
  28. from stromberg.utils import logger
  29. #import timing
  30.  
  31. BASE_PICS_DIR = '/media/onealdata1/Pictures'
  32. DEFAULT_ORGANIZATION = 'Stromberg'
  33. TAG_TYPES = {
  34.     1: 'Organizations',
  35.     2: 'Projects',
  36.     3: 'Products',
  37.     4: 'Materials',
  38.     5: 'Applications',
  39.     6: 'Other',
  40.     7: 'Themes',
  41.     8: 'Loc',
  42. }
  43.  
  44. IPTC_KEYWORDS = 'Iptc.Application2.Keywords'
  45. MS_KEYWORDS = 'Xmp.dc.subject'
  46. CONVERSION_NEEDED = True
  47. PATH_CONVERT = (
  48.     (r'\\stromberg-fs1\users', '/media/strombergfs1/users',),
  49.     (r'\\stromberg-fs1\general-documents', '/media/strombergfs1/general'),
  50. )
  51. TEMPFILE_PATH = 'hashes_complete.txt'
  52. XMP_HIERARCHY = 'Xmp.lr.hierarchicalSubject'
  53. XMP_DESCRIPTION = 'Xmp.dc.description'
  54.  
  55. ORDER_OF_TAGS_FOR_FOLDERS = [
  56.     3, 4, 5, 7, 8,
  57. ]
  58.  
  59. DEBUG = True
  60. LOG = logger.get_logger('dedupe_to_folder', DEBUG, 'deduper_logs')
  61.  
  62.  
  63. def apply_tags_to_file(tags, filepath, description=''):
  64.     """Given a dictionary of tag lists (where the key is a tag type and the
  65.    value is a list of tags that are of that type), applies those tags to the
  66.    file at the given filepath."""
  67.     keywords = set()
  68.     hierarchy = set()
  69.  
  70.     for tagtype in tags:
  71.         for tag in tags[tagtype]:
  72.             prefix = ''
  73.             if TAG_TYPES[tagtype] != 'Other':
  74.                 #if anything but Other,
  75.                 prefix = TAG_TYPES[tagtype] + '|'
  76.             tag = prefix + tag.strip()
  77.             LOG.debug("TAG %s", tag)
  78.             LOG.debug("DESCRIPTION %s", description)
  79.             if '|' in tag:
  80.                 #if a tag has a | character, it is a hierarchical tag and must
  81.                 #be stored both in the Xmp.lr.hierarchicalSubject as well as
  82.                 #the regular Xmp.dc.subject without the pipes and parent tags
  83.                 hierarchy.add(tag)
  84.                 tag = tag.split('|')[-1]
  85.             keywords.add(tag)
  86.  
  87.     if not (keywords or hierarchy or description):
  88.         return
  89.     xmpfile = libxmp.XMPFiles(file_path=filepath, open_forupdate=True)
  90.     xmp = xmpfile.get_xmp()
  91.     if keywords:
  92.         #remove existing subject tags and replace with new ones specified
  93.         xmp.delete_property(libxmp.consts.XMP_NS_DC, 'subject')
  94.         for key in sorted(keywords):
  95.             xmp.append_array_item(libxmp.consts.XMP_NS_DC, 'subject', key,
  96.                                   {'prop_array_is_ordered': True, 'prop_value_is_array': True})
  97.     if hierarchy:
  98.         #remove existing hierarchy tags and add the new ones back
  99.         xmp.register_namespace(libxmp.consts.XMP_NS_Lightroom, 'lr')
  100.         xmp.delete_property(libxmp.consts.XMP_NS_Lightroom, 'lr:hierarchicalSubject')
  101.         xmp.set_property(libxmp.consts.XMP_NS_Lightroom, 'lr:hierarchicalSubject', '',
  102.                          prop_array_is_ordered=True, prop_value_is_array=True)
  103.         for node in sorted(hierarchy):
  104.             xmp.append_array_item(libxmp.consts.XMP_NS_Lightroom, 'lr:hierarchicalSubject', node.strip(),
  105.                                   {'prop_array_is_ordered': True, 'prop_value_is_array': True})
  106.     xmp.delete_property(libxmp.consts.XMP_NS_DC, 'description')
  107.     xmp.append_array_item(libxmp.consts.XMP_NS_DC, 'description', description,
  108.                           {'prop_array_is_ordered': True, 'prop_value_is_array': True})
  109.     try:
  110.         #none of these checks seem to help prevent the Backtrace error
  111.         if xmpfile.can_put_xmp(xmp):
  112.             xmpfile.put_xmp(xmp)
  113.         if xmpfile.xmpfileptr:
  114.             xmpfile.close_file()
  115.     except IOError as ex:
  116.         LOG.warning('Could not write tags to %s. %s', filepath, ex.message)
  117.  
  118.  
  119. def convert_filepath(filepath):
  120.     for key, val in PATH_CONVERT:
  121.         if filepath.startswith(key):
  122.             filepath = filepath.replace(key, val, 1)
  123.             break
  124.     filepath = filepath.replace('\\', os.sep)
  125.     return filepath
  126.  
  127.  
  128. def copy_to_dest(try_files, dest_folder):
  129.     destination = None
  130.  
  131.     try:
  132.         os.makedirs(dest_folder)
  133.     except (IOError, OSError):
  134.         pass
  135.  
  136.     for fil in try_files:
  137.         try:
  138.             filename = os.path.basename(fil)
  139.             destination = os.path.join(dest_folder, filename)
  140.             if os.path.exists(destination):
  141.                 LOG.debug('File has already been copied to %s', destination)
  142.                 return False
  143.             shutil.copyfile(fil, destination)
  144.             break
  145.         except:
  146.             continue
  147.     else:
  148.         raise IOError('Could not copy any candidates to %s.' % dest_folder)
  149.     #print 'dat destination %s' % destination
  150.     return destination
  151.  
  152.  
  153. def get_destination_by_tag(tags):
  154.     """Given a dictionary of tags, determines a folder to put a file into.
  155.    Folder will be determined first by organization, then by project, then by
  156.    product, then by material, then by other"""
  157.  
  158.     try:
  159.         organization = tags[1][0]
  160.     except KeyError:
  161.         organization = DEFAULT_ORGANIZATION
  162.  
  163.     try:
  164.         project = tags[2][0]
  165.         no_project = False
  166.     except KeyError:
  167.         project = '0NoProject'
  168.         no_project = True
  169.  
  170.     dest_dir = os.path.join(BASE_PICS_DIR, organization, project)
  171.  
  172.     if no_project:
  173.         for tagtype in ORDER_OF_TAGS_FOR_FOLDERS:
  174.             if tagtype in tags:
  175.                 value = tags[tagtype][0]
  176.                 if TAG_TYPES[tagtype] == 'Loc':
  177.                     value = value.replace('|', os.sep)
  178.                 extra_dir = os.path.join(TAG_TYPES[tagtype], value)
  179.                 break
  180.         else:
  181.             extra_dir = 'uncategorized'
  182.         dest_dir = os.path.join(dest_dir, extra_dir)
  183.  
  184.     return dest_dir
  185.  
  186.  
  187. def main():
  188.     already_done = set()
  189.     try:
  190.         with open(TEMPFILE_PATH, 'rb') as tempfile:
  191.             for line in tempfile:
  192.                 already_done.add(line.strip())
  193.     except IOError:
  194.         pass
  195.     LOG.debug("Getting DAM files")
  196.     all_files = dam_model.get_all_files()
  197.     LOG.debug("Got the DAM files")
  198.     tempfile = open(TEMPFILE_PATH, 'a')
  199.     #x = 0
  200.     for hashed, files in all_files.iteritems():
  201.         try:
  202.             hexl = binascii.hexlify(hashed)
  203.             if hexl in already_done:
  204.                 LOG.debug('Hash %s marked as done.', hexl)
  205.                 continue
  206.             orig_files = files
  207.             if CONVERSION_NEEDED:
  208.                 files = [convert_filepath(fil) for fil in files]
  209.             LOG.debug('Getting tags for %s', hexl)
  210.             tags = dam_model.get_tags_by_hash(hexl)
  211.             LOG.debug('Getting the destination based on tags for %s', hexl)
  212.             newdest = get_destination_by_tag(tags)
  213.             LOG.debug('Destination for %s will be %s', hexl, newdest)
  214.             newfile = copy_to_dest(files, newdest)
  215.             if not newfile:
  216.                 continue
  217.             LOG.debug('Successfully copied to %s', newfile)
  218.             apply_tags_to_file(tags, newfile, '|'.join(orig_files))
  219.             tempfile.write('%s\n' % hexl)
  220.             LOG.debug('Successfully tagged %s', newfile)
  221.         except Exception as ex:
  222.             LOG.error('%s occurred. Problem was %s %s',
  223.                         type(ex).__name__, ex.message, ex)
  224.     tempfile.close()
  225.     os.remove(TEMPFILE_PATH)
  226.  
  227.         #print
  228.             #x += 1
  229.             #if x >= 100:
  230.             #    break
  231.  
  232.  
  233. if __name__ == '__main__':
  234.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement