Untitled

##
##
## __________ .____     .___   _________
## \______   \|    |    |   | /   _____/
##  |       _/|    |    |   | \_____  \
##  |    |   \|    |___ |   | /        \
##  |____|_  /|_______ \|___|/_______  /
##         \/         \/             \/
##    _____            __               .___         __
##   /     \    ____ _/  |_ _____     __| _/_____  _/  |_ _____
##  /  \ /  \ _/ __ \\   __\\__  \   / __ | \__  \ \   __\\__  \
## /    Y    \\  ___/ |  |   / __ \_/ /_/ |  / __ \_|  |   / __ \_
## \____|__  / \___  >|__|  (____  /\____ | (____  /|__|  (____  /
##         \/      \/            \/      \/      \/            \/
#
# Ben Sainsbury
# 2013.08.08
#
# Creates XML metadata using the record returned from the
# database.
# Here is a rough sketch of the data model for the
# XML export:
#
# - Root Meta Layer Info
#    - Tables
#      - Columns
#    - Lineage
#
# Note that the stored function
# METADATA.GETDISCOVERYMETA returns 'stringified
# JSON objects for the child elements = easily
# parsed by Python into dict/list objects
#
###################################################

import cx_Oracle, os, shutil, Image, base64
from HTMLParser import HTMLParser
from sys import path
path.append("//alex/rlis/drcTools/Libraries/Python/")
import MetroUtil as mu
#from validation import validate

OUTPUT_DIRECTORY = r'\\alex\rlis\rlislite\DVD\ESRISHAPEFILES\\'

IMAGE_LIBRARY = r'\\atlas\www\prod\library\rlisdiscovery\browse_graphic'

def append_text(val):
    global f
    f.write(val+'\n')

def format(wrap, val):
    instr = val.replace("&nbsp", " ")
    if "<" in instr or "&" in instr:
        instr= "<![CDATA[" + instr + "]]>"
    instr=instr.replace("\n","")
    instr=instr.strip()
    return wrap.format(word=instr)

def encode_thumbnail(filename):
    im = Image.open(IMAGE_LIBRARY+os.sep+filename)
    im.convert('RGB').save(r"c:\temp\outfile.jpg")
    with open(r"c:\temp\outfile.jpg", "rb") as image_file:
        encoded_string = base64.b64encode(image_file.read())
    return encoded_string

def update_layer(layer_id, field, value):
    try:
        stringval = value
        numericval = -99

        if type(value) == int or type(value) == float:
            numericval=value
            stringval=''

        params=[layer_id, field, stringval, numericval]
        conn = cx_Oracle.connect(mu.METADATA_RLISPROD_CONN_STRING)
        curs = conn.cursor()
        out_parameter = curs.var(cx_Oracle.NUMBER)
        result = curs.callfunc('UPDATE_LAYER', out_parameter, params)

        print result
    except Exception as e:
        print "Update failed: " + str(e)
    finally:
        conn.close()

def update_layer_date(layer_id):

    _now = datetime.datetime.now()

    update_layer(layer_id, 'TIME_PERIOD_OF_CONTENT', _now.strftime('%Y%m%d'))
    update_layer(layer_id, 'METADATA_DATE', _now.strftime('%Y%m%d'))

def create_metadata(layer_id, path=None, include_image=True):
    global f

    try:
        conn = cx_Oracle.connect(mu.METADATA_RLISPROD_CONN_STRING)

        curs = conn.cursor()

        out_parameter = curs.var(cx_Oracle.CURSOR)

        params =[]

        if type(layer_id) is int:
            params=[layer_id, '']
        else:
            params=[0,layer_id]

        data = curs.callfunc('GETDISCOVERYMETA', out_parameter, params)

        fieldNames = map(lambda foo:  foo[0], data.description)

        row = dict(zip(fieldNames, data.fetchone()))

        row["LINEAGE"] = row["LINEAGE"].read();
        row["TABLES"] = row["TABLES"].read();

        #Quick little bit of defense here
        for k in row.keys():
            row[k]='' if row[k] is None else row[k]
            row[k]='' if row[k] =='none' else row[k]

        lineage = eval(row["LINEAGE"])["lineage"]
        tables = eval(row["TABLES"])["tables"]

    except Exception, e:
        print str(e)
    finally:
        conn.close()

    file_name = row['FILE_NAME'].split('/')
    file_name = file_name[len(file_name)-1]

    out_path = os.path.join(OUTPUT_DIRECTORY, str(row['CATEGORY']).upper())

    if not os.path.exists(out_path):
        os.makedirs(out_path)

    if path is None:
        path= os.path.join(out_path, file_name+'.xml')

    f=open(path, 'w')

    append_text('<?xml version="1.0"?>')
    append_text('<metadata>')
    append_text('	<idinfo>')
    append_text('		<citation>')
    append_text('			<citeinfo>')
    append_text(format('				<origin>{word}</origin>', row['ORIGINATOR']))
    append_text(format('				<pubdate>{word}</pubdate>', row['PUBLICATION_DATE']))
    append_text(format('				<title>{word}</title>', row['TITLE']))
    append_text('			</citeinfo>')
    append_text('		</citation>')
    append_text('		<descript>')
    append_text(format('			<abstract>{word}</abstract>', row['ABSTRACT']))
    append_text(format('			<purpose>{word}</purpose>', row['PURPOSE']))
    append_text(format('			<supplinf>{word}</supplinf>' , row['SUPPLEMENTAL_INFORMATION']))
    append_text('		</descript>')
    append_text('		<timeperd>')
    append_text('			<timeinfo>')
    append_text('				<sngdate>')
    append_text(format('					<caldate>{word}</caldate>', row['TIME_PERIOD_OF_CONTENT']))
    append_text('				</sngdate>')
    append_text('			</timeinfo>')
    append_text('			<current>Ground condition</current>')
    append_text('		</timeperd>')
    append_text('		<status>')
    append_text(format('			<progress>{word}</progress>', row['PROGRESS']))
    append_text(format('			<update>{word}</update>', row['MAINT_UPDATE_FREQUENCY']))
    append_text('		</status>')
    append_text('		<spdom>')
    append_text('			<bounding>')
    append_text(format('				<westbc>{word}</westbc>', row['WEST_BOUNDING_COORDINATE']))
    append_text(format('				<eastbc>{word}</eastbc>', row['EAST_BOUNDING_COORDINATE']))
    append_text(format('				<northbc>{word}</northbc>', row['NORTH_BOUNDING_COORDINATE']))
    append_text(format('				<southbc>{word}</southbc>', row['SOUTH_BOUNDING_COORDINATE']))
    append_text('			</bounding>')
    append_text('		</spdom>')
    append_text('		<keywords>')
    append_text('			<theme>')
    append_text(format('				<themekt>{word}</themekt>', row['THEME_KEYWORD_THESAURUS']))

    for keyword in row['THEME_KEYWORD'].split(','):
        append_text(format('				<themekey>{word}</themekey>', keyword.strip()))

    append_text('			</theme>')
    append_text('			<place>')
    append_text(format('				<placekt>{word}</placekt>', row['PLACE_KEYWORD_THESAURUS']))

    for place in row['PLACE_KEYWORD'].split(','):
        append_text(format('				<placekey>{word}</placekey>', place.strip()))

    append_text('			</place>')
    append_text('		</keywords>')
    append_text(format('		<accconst>{word}</accconst>', row['ACCESS_CONSTRAINTS']))
    append_text(format('		<useconst>{word}</useconst>', row['USE_CONSTRAINTS']))

    append_text('    <browse>')
    append_text(format('        <browsen>http://library.oregonmetro.gov/rlisdiscovery/browse_graphic/{word}</browsen>', row['BROWSE_GRAPHIC_FILE_NAME']))
    append_text(format('        <browsed>{word}</browsed>', row['BROWSE_GRAPHIC_FILE_DESC']))
    append_text(format('        <browset>{word}</browset>', row['BROWSE_GRAPHIC_FILE_TYPE']))
    append_text('    </browse>')
    append_text('	</idinfo>')
    append_text('	<dataqual>')
    append_text('		<logic></logic>')
    append_text('		<complete></complete>')
    append_text('		<posacc>')
    append_text('			<horizpa>')

    if row['HORIZONTAL_POS_ACC_LINK'] == "Taxlots":
        append_text('\t\t\t\t<horizpar>Registered to tax lots. Street center lines were digitized from aerial orthophotography at +/- 10 feet accuracy. Tax lots were fit to the street centerlines. Beaverton, Oregon City and Tigard planning areas were improved to +/- 5 feet accuracy. Multnomah County was created with coordinate geometry using better control.</horizpar>')
    elif row['HORIZONTAL_POS_ACC_LINK'] == "Streets":
        append_text('\t\t\t\t<horizpar>Registered to streets. The street linework is contained within the street rights-of-way as shown on the tax lot layer. As the tax lot horizonal accuracy is improved, streets are adjusted accordingly. The tax lots are digitized at +/- 10 feet accuracy. Beaverton, Oregon City and Tigard planning areas are improved to +/- 5 feet accuracy. Multnomah County was created with coordinate geometry using better control.</horizpar>')
    else:
        append_text('\t\t\t\t<horizpar>'+row['HORIZONTAL_POS_ACCURACY']+'</horizpar>')

    append_text('			</horizpa>')
    append_text('		</posacc>')
    append_text('		<lineage>')

    for process in lineage:
        append_text('			<procstep>')
        append_text(format('				<procdesc>{word}</procdesc>', process['desc']))
        append_text(format('				<procdate>{word}</procdate>', process['date']))
        append_text('			</procstep>')

    append_text('		</lineage>')
    append_text('	</dataqual>')
    append_text('	<spdoinfo>')
    append_text(format('		<direct>{word}</direct>', row['DIRECT_SPATIAL_REF_METHOD']))

    if row['DIRECT_SPATIAL_REF_METHOD'] == "Vector" or row['DIRECT_SPATIAL_REF_METHOD'] == "Point":
        append_text('		<ptvctinf>')
        append_text('			<sdtsterm>')
        append_text(format('				<sdtstype>{word}</sdtstype>', row['SDTS_PT_AND_VEC_OBJ_TYPE']))
        append_text('			</sdtsterm>')
        append_text('		</ptvctinf>')
    elif row['DIRECT_SPATIAL_REF_METHOD'] == "Raster":
        append_text('		<rastinfo>')
        append_text(format('			<rasttype>{word}</rasttype>', row['RASTER_OBJECT_TYPE']))
        append_text('		</rastinfo>')

    append_text('	</spdoinfo>')
    append_text('	<spref>')
    append_text('		<horizsys>')
    append_text('			<planar>')
    append_text('				<gridsys>')
    append_text(format('					<gridsysn>{word}</gridsysn>', row['GRID_COORD_SYSTEM_NAME']))
    append_text('					<spcs>')
    append_text(format('						<spcszone>{word}</spcszone>', row['SPCS_ZONE_IDENTIFIER']))
    append_text('					</spcs>')
    append_text('				</gridsys>')
    append_text('				<planci>')
    append_text('					<plance></plance>')
    append_text(format('					<plandu>{word}</plandu>', row['PLANAR_DISTANCE_UNITS']))
    append_text('				</planci>')
    append_text('			</planar>')
    append_text('			<geodetic>')
    append_text(format('				<horizdn>{word}</horizdn>', row['HORIZONTAL_DATUM_NAME']))
    append_text('				<ellips></ellips>')
    append_text('				<semiaxis></semiaxis>')
    append_text('				<denflat></denflat>')
    append_text('			</geodetic>')
    append_text('		</horizsys>')
    append_text('	</spref>')

    append_text('	<eainfo>')

    if tables != None:
        for table in tables:
            append_text('		<detailed>')
            append_text('			<enttyp>')
            append_text(format('				<enttypl>{word}</enttypl>', tables[0]['name']))
            append_text(format('				<enttypd>{word}</enttypd>', tables[0]['desc']))
            append_text('				<enttypds></enttypds>')
            append_text('			</enttyp>')

            for col in table['cols']:
                append_text('			<attr>')
                append_text(format('				<attrlabl>{word}</attrlabl>', col['name']))
                append_text(format('				<attrdef>{word}</attrdef>', col['desc']))
                append_text('				<attrdefs></attrdefs>')
                append_text('				<attrdomv>')

                for domain_val in col['domain_values']:
                    append_text('					<edom>')
                    append_text(format('						<edomv>{word}</edomv>', domain_val['value']))
                    append_text(format('						<edomvd>{word}</edomvd>', domain_val['desc']))
                    append_text('						<edomvds></edomvds>')
                    append_text('					</edom>')
                else:
                    append_text('					<udom>NA</udom>')
                append_text('				</attrdomv>')
                append_text('			</attr>')

            append_text('		</detailed>')
    append_text('	</eainfo>')
    append_text('	<distinfo>')
    append_text('		<distrib>')
    append_text('			<cntinfo>')
    append_text('				<cntorgp>')
    append_text(format('					<cntorg>{word}</cntorg>', row['CONTACT_ORGANIZATION']))
    append_text('				</cntorgp>')
    append_text('				<cntaddr>')
    append_text('					<addrtype></addrtype>')
    append_text(format('					<address>{word}</address>', row['ADDRESS']))
    append_text(format('					<city>{word}</city>', row['CITY']))
    append_text(format('					<state>{word}</state>',row['STATE_OR_PROVINCE']))
    append_text(format('					<postal>{word}</postal>',row['POSTAL_CODE']))
    append_text('				</cntaddr>')
    append_text(format('				<cntvoice>{word}</cntvoice>', row['CONTACT_VOICE_TELEPHONE']))
    append_text(format('				<cntfax>{word}</cntfax>', row['CONTACT_FACSIMILE_TELEPHONE']))
    append_text(format('				<cntemail>{word}</cntemail>', row['CONTACT_E_MAIL_ADDRESS']))
    append_text('			</cntinfo>')
    append_text('		</distrib>')
    append_text(format('		<distliab>{word}</distliab>' ,row['DISTRIBUTION_LIABILITY']))
    append_text('		<stdorder>')
    append_text('			<digform>')
    append_text('				<digtinfo>')
    append_text(format('					<formname>{word}</formname>', row['FORMAT_NAME']))

    try:
      append_text(format('					<transize>{word}</transize>', str(row['TRANSFER_SIZE']/1000)))
    except Exception as e:
      append_text('					<transize>0</transize>')

    append_text('				</digtinfo>')
    append_text('				<digtopt>')
    append_text('					<offoptn>')
    append_text(format('						<offmedia>{word}</offmedia>', row['OFFLINE_MEDIA']))
    append_text(format('						<recfmt>{word}</recfmt>', row['RECORDING_FORMAT']))
    append_text('					</offoptn>')
    append_text('					<onlinopt>')
    append_text('						<computer>')
    append_text('							<networka>')
    append_text(format('								<networkr>{word}</networkr>', row['NETWORK_RESOURCE_NAME']))
    append_text('							</networka>')
    append_text('						</computer>')
    append_text(format('						<accinstr>{word}</accinstr>', row['ACCESS_INSTRUCTIONS']))
    append_text('					</onlinopt>')
    append_text('				</digtopt>')
    append_text('			</digform>')
    append_text(format('			<fees>{word}</fees>', row['FEES']))
    append_text('		</stdorder>')
    append_text('	</distinfo>')
    append_text('	<metainfo>')
    append_text(format('		<metd>{word}</metd>', row['METADATA_DATE']))
    append_text('		<metc>')
    append_text('			<cntinfo>')
    append_text('				<cntorgp>')
    append_text('					<cntorg>Metro Data Resource Center</cntorg>')
    append_text(format('					<cntper>{word}</cntper>', row['CONTACT_PERSON']))
    append_text('				</cntorgp>')
    append_text('				<cntaddr>')
    append_text('					<addrtype>mailing and physical address</addrtype>')
    append_text('					<address>600 NE Grand Ave.</address>')
    append_text('					<city>Portland</city>')
    append_text('					<state>OR</state>')
    append_text('					<postal>97232</postal>')
    append_text('				</cntaddr>')
    append_text(format('				<cntvoice>{word}</cntvoice>', row['CONTACT_VOI_TELE']))
    append_text(format('				<cntfax>{word}</cntfax> ', row['CONTACT_FAX_TELE']))
    append_text(format('				<cntemail>{word}</cntemail>', row['CONTACT_E_MAIL']))
    append_text('			</cntinfo>')
    append_text('		</metc>')
    append_text(format('		<metstdn>{word}</metstdn>', row['METADATA_STANDARD_NAME']))
    append_text(format('		<metstdv>{word}</metstdv>', row['METADATA_STANDARD_VERSION']))
    append_text('	</metainfo>')

    if include_image:
        if row['BROWSE_GRAPHIC_FILE_NAME']:
            encoded_string = encode_thumbnail(row['BROWSE_GRAPHIC_FILE_NAME'])
            append_text('    <Binary>')
            append_text('        <Thumbnail>')
            append_text('            <Data EsriPropertyType="PictureX">'+encoded_string+'</Data>')
            append_text('        </Thumbnail>')
            append_text('    </Binary>')

    append_text('</metadata>')

    f.close()

    if not path: # using this as a proxy for module-level access to this method vs main.
        print "completed metadata file for "+row["TITLE"]+ " (LayerID: "+str(row["LAYER_ID"])+")"
    #validate(out_path+'/'+ file_name+'.xml', "D:/scratch/meta_test/VALIDATION")
    #print "completed metadata validation for "+row["TITLE"]+ " (LayerID: "+str(row["LAYER_ID"])+")"
    return

if __name__ == "__main__":

    layer_id = raw_input("enter layer id (enter 0 to export all):")
    #database = raw_input("enter database to export from (PUB, TEST,DEV, PROD)[Usually prod]")

    if layer_id=='0':
        try:
            conn = cx_Oracle.connect(mu.METADATA_RLISPROD_CONN_STRING)

            curs = conn.cursor()

            x=curs.execute("SELECT distinct l.layer_id FROM metadata_layer l INNER JOIN metadata_layer_format mlf on l.layer_id = mlf.layer_id inner join metadata_category mlc on l.CATEGORY_ID = Mlc.Category_Id where mlf.transfer_on_id = 1 and l.file_name not like 'hillshd%' and Upper(mlc.category) not in ('PHOTO', 'SPECIAL') ORDER by l.layer_id")

            layers_to_export = x.fetchall()

            for layer_id in layers_to_export:
                create_metadata(int(layer_id[0]))

        except Exception as e:
            print "Metadata publish process has experienced an error:"+str(e)
        finally:
            conn.close()
    else:
        create_metadata(int(layer_id))

    print 'process complete'