Untitled

########################################################################################################################

# Title: Image Catalogue Exif Data Writing

# Authors: Matear, L., Duncan, G. (2018)                                                  Email: Liam.Matear@jncc.gov.uk
# Version Control: 1.0

# Script description:    Read in metadata from Proforma Stills Matrices from the RV Scotia Survey 1714S, and
#                        batch write all associated metadata to the Solan Bank survey 1714S image catalogue.
#                        All images are in JPEG file format.
#
#                        All code within this script runs directly from the image files within the working directory
#                        and the metadata recorded within the image stills proforma provided by an external contractor.
#                        All data used within this script are copies of the original files.
#
#                        For any enquiries please contact Liam Matear by email: Liam.Matear@jncc.gov.uk
#
# Please note:
#                        Users must ensure to create a copy of the JPEG files and to work on the copy, the
#                        packages within this script will write directly to the original, and any changes made
#                        will be permanent.

########################################################################################################################

# Section 1:                   Loading, manipulating and formatting the data within Python

########################################################################################################################


#   1a) Load in all required packages for script:
#       If required install packages using 'pip install package name command in terminal

import os
import re
import pandas as pd
import subprocess
import datetime

########################################################################################################################

#   1b) Setting a working directory for file access

os.chdir('X:\\OffshoreSurvey\\SurveyData\\2014_10_RVScotia_1714S_SolanBank\\GroundTruthing\\PhotoStation\\Copies_LM')


#       Read in metadata from .xlsx format files as Pandas (pd) DataFrames

prof_meta = pd.read_excel('20150508 Proforma_Stills analysis FINAL.xlsx', 'Stills Form')

########################################################################################################################


#   1c) Clean up Proforma_Stills (stills_meta) - remove all undesired data from proforma


def clean_stills_prof(df):
    """Pass stills proforma as df to function to remove unwanted columns"""
    try:
        df.replace({'\n': ''}, regex=True)
        df.drop(['Habitat Name (Max 100 characters). Substrate & Cover',
                 'Habitat Description (Simple): includes zone, substrate, community, depth, litter, trawl marks, physical damage, biotope fit, imagery quality comments. If problems with ID - why?',
                 'Habitat Description (Full): Additionaly includes details of search features and PMFs',
                 'Date', 'Fix Time (hh:mm:ss)', 'DateTime', 'Fix- Eastings', 'Fix - Northings', 'Fix - Lat',
                 'Fix - Long', 'Depth', 'Field of view (m2)', 'Bedrock', 'Boulders_over1024mm', 'Boulders_512to1024mm ',
                 'Boulders_256to512mm ', 'Cobbles_64to256mm', 'Pebbles_16to64mm', 'Shells_Empty ',
                 'Gravel_Stone_4to16mm', 'Gravel_Shell_4to16mm', 'Sand', 'Sand_Coarse_1to4mm',
                 'Sand_Medium_0_25to1mm ', 'Sand_Fine_0_063to0_25mm', 'Mud_lessthan0_063mm', 'Total %',
                 'Total Sediments', 'Total Rock', 'Evidence of Human Impact', 'Reef Elevation',
                 'Frag Spong Antho Habitat',  'Biotope Changed Following QA', 'OLD MNCR code',
                 'OLD Classification\n(Exact copy of MNCR descriptor)',
                 'Classification\n(Exact copy of MNCR descriptor)', 'Biotope Confidence', '2nd MNCR code',
                 '2nd Classification\n(Exact copy of MNCR descriptor)', '2nd Biotope Confidence',
                 'DeterminedBy', 'Visual quality of sample'], axis=1, inplace=True)
        return df
    except:
        print('Value Error: User must pass df as argument to function.'
              ' If this is true, errors may result because columns do not exist or the data is already cleansed')


stills_meta = clean_stills_prof(prof_meta)


#      Rename remaining columns to computer friendly format

stills_meta.rename(columns={'Still Sample Ref': 'still_ref', 'Station code': 'stn_code',
                           'Concatenated Search Features and PMFs': 'search_features_PMF', 'Fix Lat Dec': 'latitude',
                           'Fix Long Dec': 'longitude', 'Annex 1 Reef': 'annex1_reef',
                           'PMF Seabed Habitats': 'pmf_seabed_habitats', 'PMF Mobile Species': 'pmf_mobile_species',
                           'PMF Limited Mobility Species': 'pmf_limited_mobility_species', 'MNCR code': 'MNCR_code'},
                   inplace=True)

stills_meta = stills_meta.sort_values(by=['still_ref'])


########################################################################################################################

# Section 2:   Create Configuration file and format all metadata records for looping / writing

########################################################################################################################

#   2a) Create Config (.cfg) file from the column names created in the Pandas data frame (see section 1C)


configLines = ["%Image::ExifTool::UserDefined = (",
               "    'Image::ExifTool::XMP::xmp' => {",
               "        still_ref => { Name => 'still_ref' },",
               "        stn_code => { Name => 'stn_code' },",
               "        search_features_PMF => { Name => 'search_features_PMF' },",
               "        annex1_reef => { Name => 'annex1_reef' },",
               "        pmf_seabed_habitats => { Name => 'pmf_seabed_habitats' },",
               "        pmf_mobile_species => { Name => 'pmf_mobile_species' },",
               "        pmf_limited_mobility_species => { Name => 'pmf_limited_mobility_species' },",
               "        MNCR_code => { Name => 'MNCR_code' }, ",
               "        Lattitude =>  { Name => 'Latitude', WRITABLE => 'rational64s'},",
               "        Longitude => { Name => 'Longitude', WRITABLE => 'rational64s'},",
               "    },",
               ");"]

#       Create writeable .cfg file and save in the newly formatted configLines list of strings to the config file
#       This section uses a lambda function to allow for a new line break ('\n') to be applied to all entries to the
#       config file.

with open('config.cfg', 'w') as configfile:
    configfile.writelines(map(lambda s: s + '\n', configLines))

#   2b) Define user function to execute command line from Python through the use of sub-processing commands.
#       This function uses Popen to perform multiple simultaneous executions and passes the output into the
#       next subprocess using a pipe.


def subprocess_cmd(command):
    """Execute sub-processing commands to the command line tool ExifTool.
    This enables the action to be called from the config file and executed by the command line via python.
    This function uses Popen to simultaneously execute multiple executions and passes the outputs into subsequent
    sub-processes through a pipe"""
    print(command)
    process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True, encoding="utf8")
    # proc_stdout = process.communicate()[0].strip()
    # return proc_stdout


#   2c) Convert cleansed pd DataFrame to dictionary to store all benthic image metadata fields


data_dict = stills_meta.set_index('still_ref').T.to_dict('dict')
directory = 'X:\\OffshoreSurvey\\SurveyData\\2014_10_RVScotia_1714S_SolanBank\\GroundTruthing\\PhotoStation\\Copies_LM'
data_dict2 = data_dict

########################################################################################################################

# Section 3:   Loop through all metadata records and directories & write successful regex patterns for each file

########################################################################################################################


#   3a) Loop through metadata dictionary and extract station code, station number and image number data.


for key in data_dict2:
    fieldList = data_dict2[key]
    data_dict2[key] = {}
    data_dict2[key]["fields"] = fieldList
    dictSplit = key.split("_")
    stationCode = dictSplit[0]
    stationNumber = dictSplit[1]
    imageNumber = dictSplit[-1]
    imageNumber = imageNumber.replace("P", "")
    data_dict2[key]["stationcode"] = stationCode
    data_dict2[key]["stationnumber"] = stationNumber
    data_dict2[key]["imagenumber"] = imageNumber
    # print(key)

#   3b) Define object-oriented variables required for sub-processing and create an error log saved within the current
#       working directory to record all incomplete files. The formatting of this document is set in line 178 using {}
#       as a placeholder for the year/month/days/hours/minutes.

time = datetime.datetime.now()
error_log = open('{}_error_log.txt'.format(time.strftime('%Y%m%d%H%M')), 'w')
initialExifToolArgs = [r'D:\Programs\ExifTool\exiftool.exe', '-config', os.path.join(os.getcwd(), 'config.cfg')]
exifToolNamespacePrefix = "-XMP-xmp"

#   3c) Scrub existing iterations for bad files / remove all temporary files created by ExifTool.
#       Loop through all files in the listed directory and for each file - remove files with 'jpg_original' and / or
#       'jpg_exiftool_tmp' file extensions.

for base, dirnames, files in os.walk(directory):
    for eachfile in files:
        root_ext = os.path.splitext(eachfile)[1]
        if ('jpg_original' in root_ext) or ('jpg_exiftool_tmp' in root_ext):
            os.remove(os.path.join(base, eachfile))


#   3d) Loop all files within the set working directory (sub-folders included) and search for all files which match
#       the stationCode, stationNumber and imageNumber values pulled out in section 3a.
#       For all .jpg files which successfully match as regex patterns, execute the command line tool via the predefined
#       sub-process function.

for base, dirnames, files in os.walk(directory):
    for eachfile in files:
        # print(eachfile)

        # Use os.fsencode to encode file data to bytes / correct format to be used in piexif
        filename = eachfile

        # Split file into root and extension
        root_ext = os.path.splitext(filename)
        str_ext = str(root_ext)

        # Loop through string file + extensions and check that file extensions are correct (jpg / jpeg)
        if ('jpg' in str_ext.lower()) or ('jpeg' in str_ext.lower()):
            # print(str(root_ext) + 'jpeg successfully found')
            # print('jpeg successfully found')
            # Loop through dictionary keys and match using conditional statement / regular expressions (re)
            for key_match in data_dict2:
                # Split dictionary into 3 new variables
                stationCode = data_dict2[key_match]["stationcode"]
                stationNumber = data_dict2[key_match]["stationnumber"]
                imageNumber = data_dict2[key_match]["imagenumber"]
                regexPattern = re.compile(stationCode + ".+" + stationNumber + ".+" + imageNumber)
                # print(str(regexPattern) + 'successful pattern made')
                # print(str_ext)

                if re.search(regexPattern, str_ext):
                    print('Regex successfully matched')

                    fileExifToolArgs = list(initialExifToolArgs)
                    # Join loop components together again using os.path.join()
                    fullPath = os.path.join(base, filename)
                    for fieldName in data_dict2[key_match]["fields"]:
                        fieldArg = '{}:{}="{}"'.format(exifToolNamespacePrefix, fieldName,
                                                       data_dict2[key_match]["fields"][fieldName])
                        fileExifToolArgs.append(fieldArg)

                    fileExifToolArgs.append(str(fullPath))
                    subprocess_cmd(fileExifToolArgs)
                    # Perform insert of EXIF data to matched .jpg files - need to include the use of ExifTool
                    # Via the use of command line / sub-processing

                    # Remove key - to cleanup
                    dump = data_dict2.pop(key_match, None)

                    # Exit key loop (we've found a match)
                    break
                else:
                    error_log.write("\n Failed to match spreadsheet record to : " + str_ext)

error_log.close()