Guest User

Untitled

a guest
Jul 16th, 2018
128
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.99 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # this is gn_iso19139_to_mef_example.py
  3.  
  4. """
  5. Example script to create GeoNetwork's Metadata Exchange Format 1.1 (MEF) archive from ISO 19139 metadata XML files
  6.  
  7. MEF files are ZIP archives with the following structure:
  8. +-<uuid>.mef zip archive with the metadata's UUID (must be valid) as the file name
  9. | -info.xml GeoNetwork (GN) specific metadata such as privileges, related data & thumbnail image files, etc.
  10. | -metadata.xml ISO 19139 metadata record
  11. | +public directory with public thumbnail and data files - can be empty
  12. | +private directory with private (GN authentication required) data files such as shape files etc. - can be empty
  13.  
  14. Usage: make sure to edit mef_siteId, info_xml, etc.
  15.  
  16. Python 2.6
  17. Wolfgang Grunberg
  18. Arizona Geological Survey
  19. 11/06/2009
  20. """
  21.  
  22. # Library Imports - not all may be needed
  23. import os
  24. import sys
  25. import cProfile
  26. import shutil
  27. import zipfile
  28. import mimetypes
  29. from xml.dom import minidom
  30. from xml.dom import Node
  31. from time import strftime
  32.  
  33. # module globals and constants
  34. # globals
  35. __author__ = "Wolfgang Grunberg"
  36. __copyright__ = "Copyright 2009, Arizona Geological Survey"
  37. __credits__ = ["Wolfgang Grunberg", "the Internets"]
  38. __license__ = "GPL"
  39. __version__ = "1.0.0"
  40. __maintainer__ = "Wolfgang Grunberg"
  41. __email__ = "wgrunberg@azgs.az.gov"
  42. __status__ = "Prototype" # "Prototype", "Development", or "Production"
  43.  
  44.  
  45. # some settings
  46.  
  47. # ISO metadata folder path
  48. iso19139_xml_path = "C:\\tmp\\xml_test\\gn_iso19139\\"
  49. # MEF metadata folder path
  50. mef_path = "C:\\tmp\\xml_test\\gn_mef\\"
  51.  
  52. # temporary work location
  53. tmp_path = "C:\\tmp\\" # Temporary workspace to place MEF content before zipping up
  54. mef_dir_name = "temp_mef\\" # Temporary directory that is created and deleted
  55. # temporary file/folder structure
  56. metadata_xml_file = tmp_path+mef_dir_name+"metadata.xml"
  57. info_xml_file = tmp_path+mef_dir_name+"info.xml"
  58. private_dir = tmp_path+mef_dir_name+"private"
  59. public_dir = tmp_path+mef_dir_name+"public"
  60.  
  61.  
  62. def createMef():
  63. """
  64. Extract ISO 19139 metadata and create MEF archive with necessary files and folders
  65. """
  66.  
  67. print "***** START Create MEF "+strftime("%Y-%m-%d %H:%M:%S")+" *****"
  68. uuid = "missing" # metadata record UUID dummy. NOTE: this must be a valid UUID!
  69. create_date = strftime("%Y-%m-%d %H:%M:%S") # dummy metadata creation date
  70. mef_siteId = "00000000-0000-0000-0000-000000000000" # metadata creator UUID. NOTE: this must be a valid UUID!
  71.  
  72. # get list of file names from ISO directory
  73. try:
  74. dir = os.listdir(iso19139_xml_path)
  75. #print dir #debug
  76. except os.error:
  77. print " EXCEPTION: ISO 19139 metadata directory does not exits ("+iso19139_xml_path+") "
  78. return
  79.  
  80. # go through each ISO 19139 metadata file
  81. for file_name in dir:
  82. #print file_name #debug
  83. iso_source_file = iso19139_xml_path+file_name # path to metadata file
  84.  
  85. # retrieve UUID from <gmd:fileIdentifier><gco:CharacterString>. NOTE: this must be a valid UUID!
  86. with open(iso_source_file, 'r') as f:
  87. iso_metadata = f.read()
  88. print " Reading metadata: "+file_name
  89. #print iso_metadata #debug
  90. # Load ISO metadata string into XML object
  91. xmldoc = minidom.parseString(iso_metadata)
  92. #print xmldoc.toxml() #debug
  93. uuid = xmldoc.getElementsByTagName('gmd:fileIdentifier')[0].getElementsByTagName('gco:CharacterString')[0].firstChild.data
  94. #print uuid #debug
  95. create_date = xmldoc.getElementsByTagName('gmd:dateStamp')[0].getElementsByTagName('gco:DateTime')[0].firstChild.data
  96. #print create_date #debug
  97. xmldoc.unlink() # cleanup DOM for improved performance
  98. f.close()
  99. True
  100.  
  101. # if the MEF file doesn't exist, create it
  102. if os.path.exists(mef_path+uuid+'.mef') == False:
  103. # create temporary directory
  104. try:
  105. os.mkdir(tmp_path+mef_dir_name)
  106. except OSError, err:
  107. print >>sys.stderr, " EXCEPTION: ", err
  108. return
  109. # create tmp/public/ and tmp/private/ directories
  110. try:
  111. os.mkdir(private_dir)
  112. except OSError, err:
  113. print >>sys.stderr, " EXCEPTION: ", err
  114. return
  115. try:
  116. os.mkdir(public_dir)
  117. except OSError, err:
  118. print >>sys.stderr, " EXCEPTION: ", err
  119. return
  120.  
  121. # copy iso metadata to <temporary location>/metadata.xml
  122. shutil.copy2(iso_source_file, metadata_xml_file)
  123.  
  124. # construct info.xml file
  125. info_xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
  126. info_xml += "<info version=\"1.0\"><general>"
  127. info_xml +="<uuid>"+uuid+"</uuid>" # universally unique identifier assigned to the metadata and must be a valid UUID. This element is optional and, when omitted, the reader should generate one
  128. info_xml +="<createDate>"+create_date+"</createDate>" # when the metadata was created
  129. info_xml +="<changeDate>"+strftime("%Y-%m-%d %H:%M:%S")+"</changeDate>" # most recent change to the metadata.
  130. info_xml +="<siteId>"+mef_siteId+"</siteId>" # This is an UUID that identifies the actor that created the metadata and must be a valid UUID. When the UUID element is missing, this element should be missing too. If present, it will be ignored.
  131. info_xml +="<siteName>Arizona Geological Survey</siteName>" # Site Name
  132. info_xml +="<schema>iso19139</schema>" # dublin-core, fgdc-std, iso19115, iso19139
  133. info_xml +="<format>full</format>" # MEF format: simple, partial, full
  134. info_xml +="<localId>"+uuid+"</localId>" # OPTIONAL If present, indicates the id used locally by the sourceId actor to store the metadata. Its purpose is just to allow the reuse of the same local id when reimporting a metadata.
  135. info_xml +="<isTemplate>false</isTemplate>" # A boolean field that indicates if this metadata is a template used to create new ones. There is no real distinction between a real metadata and a template but some actors use it to allow fast metadata creation.
  136. info_xml +="<rating>0</rating>" # If present, indicates the users' rating of the metadata ranging from 1 (a bad rating) to 5 (an excellent rating). The special value 0 means that the metadata has not been rated yet. Can be used to sort search results.
  137. info_xml +="<popularity>0</popularity>" # If present, indicates the popularity of the metadata. The value must be positive and high values mean high popularity. The criteria used to set the popularity is left to the writer. Its main purpose is to provide a metadata ordering during a search.
  138. info_xml +="</general><categories> <category name=\"geology\"/><category name=\"datasets\"/></categories>" # GN categories
  139. info_xml +="<privileges>" # GN privileges
  140. info_xml +="<group name=\"all\">\
  141. <operation name=\"view\" />\
  142. <operation name=\"download\"/>\
  143. <operation name=\"dynamic\"/>\
  144. <operation name=\"featured\"/>\
  145. </group>\
  146. <group name=\"intranet\">\
  147. <operation name=\"view\" />\
  148. <operation name=\"download\"/>\
  149. <operation name=\"dynamic\"/>\
  150. <operation name=\"featured\"/>\
  151. </group>\
  152. <group name=\"admin\">\
  153. <operation name=\"view\" />\
  154. <operation name=\"download\"/>\
  155. <operation name=\"dynamic\"/>\
  156. <operation name=\"featured\"/>\
  157. <operation name=\"notify\"/>\
  158. </group>\
  159. </privileges>"
  160. info_xml +="<public/>" # GN public files
  161. info_xml +="<private/></info>" # GN private files - require authentication
  162. #print info_xml # debug
  163.  
  164. # create tmp/info.xml file
  165. try:
  166. f = open(info_xml_file,'w')
  167. f.write(info_xml)
  168. f.close()
  169. True
  170. except:
  171. print " EXCEPTION: failed to write "+info_xml_file
  172.  
  173. # mef file name
  174. zfilename = uuid+".mef"
  175. # create list files and folders to archive
  176. archive_list = ['metadata.xml', 'info.xml', 'public', 'private']
  177. # go to temporary directory
  178. os.chdir(tmp_path+mef_dir_name)
  179. #print os.getcwd() # debug
  180. # zip and copy mef if it does not already exist
  181. if os.path.exists(mef_path+zfilename) == False:
  182. zout = zipfile.ZipFile(mef_path+zfilename, "w")
  183. # add files and folders to mef files
  184. for fname in archive_list:
  185. zout.write(fname)
  186. zout.close()
  187. print " creating "+mef_path+zfilename
  188. else:
  189. #print " XML file exists"
  190. print " "+mef_path+zfilename+" already exists - skipping it"
  191. pass
  192.  
  193. # go up a level of temporary directory
  194. os.chdir(tmp_path)
  195. # delete tmp stuff
  196. try:
  197. shutil.rmtree(tmp_path+mef_dir_name)
  198. print " deleted "+tmp_path+mef_dir_name+" directory"
  199. except OSError:
  200. print " EXCEPTION: failed to delete "+tmp_path+mef_dir_name
  201. return
  202. else:
  203. print " "+uuid+".mef already exists - skipping it"
  204. print "***** END Create MEF "+strftime("%Y-%m-%d %H:%M:%S")+" *****"
  205.  
  206. if __name__=="__main__":
  207. createMef()
  208. #cProfile.run('createMef()') # execution performance information
Add Comment
Please, Sign In to add comment