Create GeoNetwork MEF files from ISO 19139 XML through Python

Submitted by wgrunberg on Fri, 11/06/2009 - 10:11am
This is an example Python script that showcases the creation of GeoNetwork Metadata Exchange Format 1.1 (MEF) archives from ISO 19139 metadata XML files.It has been tested in Windows XP and Python 2.6.
#!/usr/bin/env python
# this is gn_iso19139_to_mef_example.py

"""
Example script to create GeoNetwork's Metadata Exchange Format 1.1 (MEF) archive from ISO 19139 metadata XML files

MEF files are ZIP archives with the following structure:
+-<uuid>.mef        zip archive with the metadata's UUID (must be valid) as the file name
 | -info.xml        GeoNetwork (GN) specific metadata such as privileges, related data & thumbnail image files, etc.
 | -metadata.xml    ISO 19139 metadata record
 | +public          directory with public thumbnail and data files - can be empty
 | +private         directory with private (GN authentication required) data files such as shape files etc. - can be empty

Usage: make sure to edit mef_siteId, info_xml, etc.

Python 2.6
Wolfgang Grunberg
Arizona Geological Survey
11/06/2009
"""

# Library Imports - not all may be needed
import os
import sys
import cProfile
import shutil
import zipfile
import mimetypes
from xml.dom import minidom
from xml.dom import Node
from time import strftime

# module globals and constants
#  globals
__author__ = "Wolfgang Grunberg"
__copyright__ = "Copyright 2009, Arizona Geological Survey"
__credits__ = ["Wolfgang Grunberg", "the Internets"]
__license__ = "GPL"
__version__ = "1.0.0"
__maintainer__ = "Wolfgang Grunberg"
__email__ = "wgrunberg@azgs.az.gov"
__status__ = "Prototype"                  # "Prototype", "Development", or "Production"


# some settings

# ISO metadata folder path
iso19139_xml_path = "C:\\tmp\\xml_test\\gn_iso19139\\"
# MEF metadata folder path
mef_path = "C:\\tmp\\xml_test\\gn_mef\\"

# temporary work location
tmp_path = "C:\\tmp\\"          # Temporary workspace to place MEF content before zipping up
mef_dir_name = "temp_mef\\"     # Temporary directory that is created and deleted
# temporary file/folder structure
metadata_xml_file = tmp_path+mef_dir_name+"metadata.xml"
info_xml_file = tmp_path+mef_dir_name+"info.xml"
private_dir = tmp_path+mef_dir_name+"private"
public_dir = tmp_path+mef_dir_name+"public"


def createMef():
    """
    Extract ISO 19139 metadata and create MEF archive with necessary files and folders
    """
    
    print "***** START Create MEF "+strftime("%Y-%m-%d %H:%M:%S")+" *****"
    uuid = "missing"    # metadata record UUID dummy. NOTE: this must be a valid UUID!
    create_date = strftime("%Y-%m-%d %H:%M:%S")     # dummy metadata creation date
    mef_siteId = "00000000-0000-0000-0000-000000000000"     # metadata creator UUID. NOTE: this must be a valid UUID!
    
    # get list of file names from ISO directory
    try:
        dir = os.listdir(iso19139_xml_path)
        #print dir    #debug
    except os.error:
        print "    EXCEPTION: ISO 19139 metadata directory does not exits ("+iso19139_xml_path+") "
        return
    
    # go through each ISO 19139 metadata file
    for file_name in dir:
        #print file_name    #debug
        iso_source_file = iso19139_xml_path+file_name   # path to metadata file
        
        # retrieve UUID from <gmd:fileIdentifier><gco:CharacterString>. NOTE: this must be a valid UUID!
        with open(iso_source_file, 'r') as f:
            iso_metadata = f.read()
            print "    Reading metadata: "+file_name
            #print iso_metadata    #debug
            # Load ISO metadata string into XML object
            xmldoc = minidom.parseString(iso_metadata)
            #print xmldoc.toxml()    #debug
            uuid = xmldoc.getElementsByTagName('gmd:fileIdentifier')[0].getElementsByTagName('gco:CharacterString')[0].firstChild.data
            #print uuid    #debug
            create_date = xmldoc.getElementsByTagName('gmd:dateStamp')[0].getElementsByTagName('gco:DateTime')[0].firstChild.data
            #print create_date   #debug
            xmldoc.unlink()     # cleanup DOM for improved performance
        f.close()
        True
        
        # if the MEF file doesn't exist, create it
        if os.path.exists(mef_path+uuid+'.mef') == False:
            # create temporary directory
            try:
                os.mkdir(tmp_path+mef_dir_name)
            except OSError, err:
                print >>sys.stderr, "    EXCEPTION: ", err
                return
            # create tmp/public/ and tmp/private/ directories
            try:
                os.mkdir(private_dir)
            except OSError, err:
                print >>sys.stderr, "    EXCEPTION: ", err
                return
            try:
                os.mkdir(public_dir)
            except OSError, err:
                print >>sys.stderr, "    EXCEPTION: ", err
                return
            
            # copy iso metadata to <temporary location>/metadata.xml
            shutil.copy2(iso_source_file, metadata_xml_file)
            
            # construct info.xml file
            info_xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>"
            info_xml += "<info version=\"1.0\"><general>"
            info_xml +="<uuid>"+uuid+"</uuid>"          # universally unique identifier assigned to the metadata and must be a valid UUID. This element is optional and, when omitted, the reader should generate one
            info_xml +="<createDate>"+create_date+"</createDate>"   # when the metadata was created
            info_xml +="<changeDate>"+strftime("%Y-%m-%d %H:%M:%S")+"</changeDate>"     # most recent change to the metadata. 
            info_xml +="<siteId>"+mef_siteId+"</siteId>"    # This is an UUID that identifies the actor that created the metadata and must be a valid UUID. When the UUID element is missing, this element should be missing too. If present, it will be ignored.
            info_xml +="<siteName>Arizona Geological Survey</siteName>"     # Site Name
            info_xml +="<schema>iso19139</schema>"      # dublin-core, fgdc-std, iso19115, iso19139
            info_xml +="<format>full</format>"          # MEF format: simple, partial, full
            info_xml +="<localId>"+uuid+"</localId>"    # OPTIONAL If present, indicates the id used locally by the sourceId actor to store the metadata. Its purpose is just to allow the reuse of the same local id when reimporting a metadata.
            info_xml +="<isTemplate>false</isTemplate>" # A boolean field that indicates if this metadata is a template used to create new ones. There is no real distinction between a real metadata and a template but some actors use it to allow fast metadata creation.
            info_xml +="<rating>0</rating>"             # If present, indicates the users' rating of the metadata ranging from 1 (a bad rating) to 5 (an excellent rating). The special value 0 means that the metadata has not been rated yet. Can be used to sort search results.
            info_xml +="<popularity>0</popularity>"     # If present, indicates the popularity of the metadata. The value must be positive and high values mean high popularity. The criteria used to set the popularity is left to the writer. Its main purpose is to provide a metadata ordering during a search.
            info_xml +="</general><categories> <category name=\"geology\"/><category name=\"datasets\"/></categories>"  # GN categories
            info_xml +="<privileges>"                   # GN privileges
            info_xml +="<group name=\"all\">\
            <operation name=\"view\" />\
            <operation name=\"download\"/>\
            <operation name=\"dynamic\"/>\
            <operation name=\"featured\"/>\
            </group>\
            <group name=\"intranet\">\
            <operation name=\"view\" />\
            <operation name=\"download\"/>\
            <operation name=\"dynamic\"/>\
            <operation name=\"featured\"/>\
            </group>\
            <group name=\"admin\">\
            <operation name=\"view\" />\
            <operation name=\"download\"/>\
            <operation name=\"dynamic\"/>\
            <operation name=\"featured\"/>\
            <operation name=\"notify\"/>\
            </group>\
            </privileges>"
            info_xml +="<public/>"              # GN public files
            info_xml +="<private/></info>"      # GN private files - require authentication 
            #print info_xml      # debug
            
            # create tmp/info.xml file
            try: 
                f = open(info_xml_file,'w')
                f.write(info_xml)
                f.close()
                True
            except:
                print "    EXCEPTION: failed to write "+info_xml_file
            
            # mef file name
            zfilename = uuid+".mef"
            # create list files and folders to archive
            archive_list = ['metadata.xml', 'info.xml', 'public', 'private']
           # go to temporary directory
            os.chdir(tmp_path+mef_dir_name)
            #print os.getcwd()   # debug
            # zip and copy mef if it does not already exist
            if os.path.exists(mef_path+zfilename) == False:
                zout = zipfile.ZipFile(mef_path+zfilename, "w")
                # add files and folders to mef files
                for fname in archive_list:
                    zout.write(fname)
                zout.close()
                print "    creating "+mef_path+zfilename
            else:
                #print "   XML file exists"
                print "    "+mef_path+zfilename+" already exists - skipping it"
                pass
            
            # go up a level of temporary directory
            os.chdir(tmp_path)
            # delete tmp stuff
            try:
                shutil.rmtree(tmp_path+mef_dir_name)
                print "    deleted "+tmp_path+mef_dir_name+" directory"
            except OSError:
                print "    EXCEPTION: failed to delete "+tmp_path+mef_dir_name
                return
        else:
            print "    "+uuid+".mef already exists - skipping it"
    print "***** END Create MEF "+strftime("%Y-%m-%d %H:%M:%S")+" *****"

if __name__=="__main__":
    createMef()
    #cProfile.run('createMef()')      # execution performance information
Files:
gn_iso19139_to_mef_example.py_.txt
Groups:
USGIN Lab

Create GeoNetwork MEF files from ISO 19139 XML through Python

Community

ETL Debug Blog

ETL Debug Blog

Group notifications

Related Content

Group Posts

User login