elev_data.py

import urllib2
import re

# how far back in days to look
day_count = 3

url = 'http://198.17.86.43/cgi-bin/cgiwrap/zinger/slBasin2Hgl.py?dataType=Elev&locn=Prado+%28GOES%29&days=' + str(day_count) + '&req=Text'

# download the HTML
print "Downloading " + str(day_count) + " day(s) of levels"
elev_data_page = urllib2.urlopen(url).read()

# regular expression format for the hidden data
hidden_data_regex = '^\(([0-9.]*), u\'([0-9]{8} [0-9]{4})\'\)'

# pull all the hidden data from the HTML
all_levels = re.findall(hidden_data_regex, elev_data_page, re.MULTILINE)

# if you want to do level comparisons, the levels need to
# be numbers instead of strings, convert and round them
# to 2 decimals using a list comprehension
all_levels = [ (round(float(level), 2), time) for (level, time) in all_levels ]

# there's no structure to the data in the web-page, gotta do some
# hard work here. first, get all the unique timestamps.
unique_times = sorted(list(set([ time for (level, time) in all_levels ])))

# now, lets assume every value should have 4 data points...
# discard the ones that don't. FYI: only data on the hour has
# 4 points - change this if quarter-hourly data is needed.
level_table = []
ignored_counter = 0
for one_time in unique_times:
    data_points = filter(lambda time: time[1] == one_time, all_levels)
    if len(data_points) == 4:
        level_table.append({'timestamp': one_time,
                            'elevation': data_points[0][0],
                            'storage': data_points[1][0],
                            'average_inflow': data_points[2][0],
                            'instantaneous_outflow': data_points[3][0]
                            })
    else:
        #print 'Ignoring timestamp: \'' + one_time + '\'. Only ' + str(len(data_points)) + ' data points'
        ignored_counter += 1

print 'Ignoring ' + str(ignored_counter) + ' data points with insufficient info.'

# filter out only the midnight values (using a lambda function)
true_if_midnight = lambda item: item['timestamp'].endswith('0000')

# use the lambda function to filter the midnight values only
midnight_table = filter(true_if_midnight, level_table)

# a function to insert hyphens and colons to make the date pretty
def reformat_date(time):
    return time[0:2] + '-' + time[2:4] + '-' + time[4:8] + ' ' + time[9:11] + ':' + time[11:13]


print ""
print "CSV formatted:"
print "---------------------------------------------"
print ""

print 'Timestamp,Elevation,Storage,Average Inflow,Instantaneous Outflow'

for level in midnight_table:
    print reformat_date(level['timestamp']) + ',' + str(level['elevation']) + ',' + \
                    str(level['storage']) + ',' + str(level['average_inflow']) + ',' + \
                    str(level['instantaneous_outflow'])