Guest User

Untitled

a guest
Jun 20th, 2018
144
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.52 KB | None | 0 0
  1. ########################################################################################################################
  2.  
  3. # Title: Image Catalogue Exif Data Writing
  4.  
  5. # Authors: Matear, L., Duncan, G. (2018) Email: Liam.Matear@jncc.gov.uk
  6. # Version Control: 1.0
  7.  
  8. # Script description: Read in metadata from Proforma Stills Matrices from the RV Scotia Survey 1714S, and
  9. # batch write all associated metadata to the Solan Bank survey 1714S image catalogue.
  10. # All images are in JPEG file format.
  11. #
  12. # All code within this script runs directly from the image files within the working directory
  13. # and the metadata recorded within the image stills proforma provided by an external contractor.
  14. # All data used within this script are copies of the original files.
  15. #
  16. # For any enquiries please contact Liam Matear by email: Liam.Matear@jncc.gov.uk
  17. #
  18. # Please note:
  19. # Users must ensure to create a copy of the JPEG files and to work on the copy, the
  20. # packages within this script will write directly to the original, and any changes made
  21. # will be permanent.
  22.  
  23. ########################################################################################################################
  24.  
  25. # Section 1: Loading, manipulating and formatting the data within Python
  26.  
  27. ########################################################################################################################
  28.  
  29.  
  30. # 1a) Load in all required packages for script:
  31. # If required install packages using 'pip install package name command in terminal
  32.  
  33. import os
  34. import re
  35. import pandas as pd
  36. import subprocess
  37. import datetime
  38.  
  39. ########################################################################################################################
  40.  
  41. # 1b) Setting a working directory for file access
  42.  
  43. os.chdir('X:\\OffshoreSurvey\\SurveyData\\2014_10_RVScotia_1714S_SolanBank\\GroundTruthing\\PhotoStation\\Copies_LM')
  44.  
  45.  
  46. # Read in metadata from .xlsx format files as Pandas (pd) DataFrames
  47.  
  48. prof_meta = pd.read_excel('20150508 Proforma_Stills analysis FINAL.xlsx', 'Stills Form')
  49.  
  50. ########################################################################################################################
  51.  
  52.  
  53. # 1c) Clean up Proforma_Stills (stills_meta) - remove all undesired data from proforma
  54.  
  55.  
  56.  
  57. def clean_stills_prof(df):
  58. """Pass stills proforma as df to function to remove unwanted columns"""
  59. try:
  60. df.replace({'\n': ''}, regex=True)
  61. df.drop(['Habitat Name (Max 100 characters). Substrate & Cover',
  62. 'Habitat Description (Simple): includes zone, substrate, community, depth, litter, trawl marks, physical damage, biotope fit, imagery quality comments. If problems with ID - why?',
  63. 'Habitat Description (Full): Additionaly includes details of search features and PMFs',
  64. 'Date', 'Fix Time (hh:mm:ss)', 'DateTime', 'Fix- Eastings', 'Fix - Northings', 'Fix - Lat',
  65. 'Fix - Long', 'Depth', 'Field of view (m2)', 'Bedrock', 'Boulders_over1024mm', 'Boulders_512to1024mm ',
  66. 'Boulders_256to512mm ', 'Cobbles_64to256mm', 'Pebbles_16to64mm', 'Shells_Empty ',
  67. 'Gravel_Stone_4to16mm', 'Gravel_Shell_4to16mm', 'Sand', 'Sand_Coarse_1to4mm',
  68. 'Sand_Medium_0_25to1mm ', 'Sand_Fine_0_063to0_25mm', 'Mud_lessthan0_063mm', 'Total %',
  69. 'Total Sediments', 'Total Rock', 'Evidence of Human Impact', 'Reef Elevation',
  70. 'Frag Spong Antho Habitat', 'Biotope Changed Following QA', 'OLD MNCR code',
  71. 'OLD Classification\n(Exact copy of MNCR descriptor)',
  72. 'Classification\n(Exact copy of MNCR descriptor)', 'Biotope Confidence', '2nd MNCR code',
  73. '2nd Classification\n(Exact copy of MNCR descriptor)', '2nd Biotope Confidence',
  74. 'DeterminedBy', 'Visual quality of sample'], axis=1, inplace=True)
  75. return df
  76. except:
  77. print('Value Error: User must pass df as argument to function.'
  78. ' If this is true, errors may result because columns do not exist or the data is already cleansed')
  79.  
  80.  
  81. stills_meta = clean_stills_prof(prof_meta)
  82.  
  83.  
  84. # Rename remaining columns to computer friendly format
  85.  
  86. stills_meta.rename(columns={'Still Sample Ref': 'still_ref', 'Station code': 'stn_code',
  87. 'Concatenated Search Features and PMFs': 'search_features_PMF', 'Fix Lat Dec': 'latitude',
  88. 'Fix Long Dec': 'longitude', 'Annex 1 Reef': 'annex1_reef',
  89. 'PMF Seabed Habitats': 'pmf_seabed_habitats', 'PMF Mobile Species': 'pmf_mobile_species',
  90. 'PMF Limited Mobility Species': 'pmf_limited_mobility_species', 'MNCR code': 'MNCR_code'},
  91. inplace=True)
  92.  
  93. stills_meta = stills_meta.sort_values(by=['still_ref'])
  94.  
  95.  
  96. ########################################################################################################################
  97.  
  98. # Section 2: Create Configuration file and format all metadata records for looping / writing
  99.  
  100. ########################################################################################################################
  101.  
  102. # 2a) Create Config (.cfg) file from the column names created in the Pandas data frame (see section 1C)
  103.  
  104.  
  105. configLines = ["%Image::ExifTool::UserDefined = (",
  106. " 'Image::ExifTool::XMP::xmp' => {",
  107. " still_ref => { Name => 'still_ref' },",
  108. " stn_code => { Name => 'stn_code' },",
  109. " search_features_PMF => { Name => 'search_features_PMF' },",
  110. " annex1_reef => { Name => 'annex1_reef' },",
  111. " pmf_seabed_habitats => { Name => 'pmf_seabed_habitats' },",
  112. " pmf_mobile_species => { Name => 'pmf_mobile_species' },",
  113. " pmf_limited_mobility_species => { Name => 'pmf_limited_mobility_species' },",
  114. " MNCR_code => { Name => 'MNCR_code' }, ",
  115. " Lattitude => { Name => 'Latitude', WRITABLE => 'rational64s'},",
  116. " Longitude => { Name => 'Longitude', WRITABLE => 'rational64s'},",
  117. " },",
  118. ");"]
  119.  
  120. # Create writeable .cfg file and save in the newly formatted configLines list of strings to the config file
  121. # This section uses a lambda function to allow for a new line break ('\n') to be applied to all entries to the
  122. # config file.
  123.  
  124. with open('config.cfg', 'w') as configfile:
  125. configfile.writelines(map(lambda s: s + '\n', configLines))
  126.  
  127. # 2b) Define user function to execute command line from Python through the use of sub-processing commands.
  128. # This function uses Popen to perform multiple simultaneous executions and passes the output into the
  129. # next subprocess using a pipe.
  130.  
  131.  
  132. def subprocess_cmd(command):
  133. """Execute sub-processing commands to the command line tool ExifTool.
  134. This enables the action to be called from the config file and executed by the command line via python.
  135. This function uses Popen to simultaneously execute multiple executions and passes the outputs into subsequent
  136. sub-processes through a pipe"""
  137. print(command)
  138. process = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True, encoding="utf8")
  139. # proc_stdout = process.communicate()[0].strip()
  140. # return proc_stdout
  141.  
  142.  
  143. # 2c) Convert cleansed pd DataFrame to dictionary to store all benthic image metadata fields
  144.  
  145.  
  146. data_dict = stills_meta.set_index('still_ref').T.to_dict('dict')
  147. directory = 'X:\\OffshoreSurvey\\SurveyData\\2014_10_RVScotia_1714S_SolanBank\\GroundTruthing\\PhotoStation\\Copies_LM'
  148. data_dict2 = data_dict
  149.  
  150. ########################################################################################################################
  151.  
  152. # Section 3: Loop through all metadata records and directories & write successful regex patterns for each file
  153.  
  154. ########################################################################################################################
  155.  
  156.  
  157. # 3a) Loop through metadata dictionary and extract station code, station number and image number data.
  158.  
  159.  
  160. for key in data_dict2:
  161. fieldList = data_dict2[key]
  162. data_dict2[key] = {}
  163. data_dict2[key]["fields"] = fieldList
  164. dictSplit = key.split("_")
  165. stationCode = dictSplit[0]
  166. stationNumber = dictSplit[1]
  167. imageNumber = dictSplit[-1]
  168. imageNumber = imageNumber.replace("P", "")
  169. data_dict2[key]["stationcode"] = stationCode
  170. data_dict2[key]["stationnumber"] = stationNumber
  171. data_dict2[key]["imagenumber"] = imageNumber
  172. # print(key)
  173.  
  174. # 3b) Define object-oriented variables required for sub-processing and create an error log saved within the current
  175. # working directory to record all incomplete files. The formatting of this document is set in line 178 using {}
  176. # as a placeholder for the year/month/days/hours/minutes.
  177.  
  178. time = datetime.datetime.now()
  179. error_log = open('{}_error_log.txt'.format(time.strftime('%Y%m%d%H%M')), 'w')
  180. initialExifToolArgs = [r'D:\Programs\ExifTool\exiftool.exe', '-config', os.path.join(os.getcwd(), 'config.cfg')]
  181. exifToolNamespacePrefix = "-XMP-xmp"
  182.  
  183. # 3c) Scrub existing iterations for bad files / remove all temporary files created by ExifTool.
  184. # Loop through all files in the listed directory and for each file - remove files with 'jpg_original' and / or
  185. # 'jpg_exiftool_tmp' file extensions.
  186.  
  187. for base, dirnames, files in os.walk(directory):
  188. for eachfile in files:
  189. root_ext = os.path.splitext(eachfile)[1]
  190. if ('jpg_original' in root_ext) or ('jpg_exiftool_tmp' in root_ext):
  191. os.remove(os.path.join(base, eachfile))
  192.  
  193.  
  194. # 3d) Loop all files within the set working directory (sub-folders included) and search for all files which match
  195. # the stationCode, stationNumber and imageNumber values pulled out in section 3a.
  196. # For all .jpg files which successfully match as regex patterns, execute the command line tool via the predefined
  197. # sub-process function.
  198.  
  199. for base, dirnames, files in os.walk(directory):
  200. for eachfile in files:
  201. # print(eachfile)
  202.  
  203. # Use os.fsencode to encode file data to bytes / correct format to be used in piexif
  204. filename = eachfile
  205.  
  206. # Split file into root and extension
  207. root_ext = os.path.splitext(filename)
  208. str_ext = str(root_ext)
  209.  
  210. # Loop through string file + extensions and check that file extensions are correct (jpg / jpeg)
  211. if ('jpg' in str_ext.lower()) or ('jpeg' in str_ext.lower()):
  212. # print(str(root_ext) + 'jpeg successfully found')
  213. # print('jpeg successfully found')
  214. # Loop through dictionary keys and match using conditional statement / regular expressions (re)
  215. for key_match in data_dict2:
  216. # Split dictionary into 3 new variables
  217. stationCode = data_dict2[key_match]["stationcode"]
  218. stationNumber = data_dict2[key_match]["stationnumber"]
  219. imageNumber = data_dict2[key_match]["imagenumber"]
  220. regexPattern = re.compile(stationCode + ".+" + stationNumber + ".+" + imageNumber)
  221. # print(str(regexPattern) + 'successful pattern made')
  222. # print(str_ext)
  223.  
  224. if re.search(regexPattern, str_ext):
  225. print('Regex successfully matched')
  226.  
  227. fileExifToolArgs = list(initialExifToolArgs)
  228. # Join loop components together again using os.path.join()
  229. fullPath = os.path.join(base, filename)
  230. for fieldName in data_dict2[key_match]["fields"]:
  231. fieldArg = '{}:{}="{}"'.format(exifToolNamespacePrefix, fieldName,
  232. data_dict2[key_match]["fields"][fieldName])
  233. fileExifToolArgs.append(fieldArg)
  234.  
  235. fileExifToolArgs.append(str(fullPath))
  236. subprocess_cmd(fileExifToolArgs)
  237. # Perform insert of EXIF data to matched .jpg files - need to include the use of ExifTool
  238. # Via the use of command line / sub-processing
  239.  
  240. # Remove key - to cleanup
  241. dump = data_dict2.pop(key_match, None)
  242.  
  243. # Exit key loop (we've found a match)
  244. break
  245. else:
  246. error_log.write("\n Failed to match spreadsheet record to : " + str_ext)
  247.  
  248. error_log.close()
Add Comment
Please, Sign In to add comment