Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import urllib2
- import re
- # how far back in days to look
- day_count = 3
- url = 'http://198.17.86.43/cgi-bin/cgiwrap/zinger/slBasin2Hgl.py?dataType=Elev&locn=Prado+%28GOES%29&days=' + str(day_count) + '&req=Text'
- # download the HTML
- print "Downloading " + str(day_count) + " day(s) of levels"
- elev_data_page = urllib2.urlopen(url).read()
- # regular expression format for the hidden data
- hidden_data_regex = '^\(([0-9.]*), u\'([0-9]{8} [0-9]{4})\'\)'
- # pull all the hidden data from the HTML
- all_levels = re.findall(hidden_data_regex, elev_data_page, re.MULTILINE)
- # if you want to do level comparisons, the levels need to
- # be numbers instead of strings, convert and round them
- # to 2 decimals using a list comprehension
- all_levels = [ (round(float(level), 2), time) for (level, time) in all_levels ]
- # there's no structure to the data in the web-page, gotta do some
- # hard work here. first, get all the unique timestamps.
- unique_times = sorted(list(set([ time for (level, time) in all_levels ])))
- # now, lets assume every value should have 4 data points...
- # discard the ones that don't. FYI: only data on the hour has
- # 4 points - change this if quarter-hourly data is needed.
- level_table = []
- ignored_counter = 0
- for one_time in unique_times:
- data_points = filter(lambda time: time[1] == one_time, all_levels)
- if len(data_points) == 4:
- level_table.append({'timestamp': one_time,
- 'elevation': data_points[0][0],
- 'storage': data_points[1][0],
- 'average_inflow': data_points[2][0],
- 'instantaneous_outflow': data_points[3][0]
- })
- else:
- #print 'Ignoring timestamp: \'' + one_time + '\'. Only ' + str(len(data_points)) + ' data points'
- ignored_counter += 1
- print 'Ignoring ' + str(ignored_counter) + ' data points with insufficient info.'
- # filter out only the midnight values (using a lambda function)
- true_if_midnight = lambda item: item['timestamp'].endswith('0000')
- # use the lambda function to filter the midnight values only
- midnight_table = filter(true_if_midnight, level_table)
- # a function to insert hyphens and colons to make the date pretty
- def reformat_date(time):
- return time[0:2] + '-' + time[2:4] + '-' + time[4:8] + ' ' + time[9:11] + ':' + time[11:13]
- print ""
- print "CSV formatted:"
- print "---------------------------------------------"
- print ""
- print 'Timestamp,Elevation,Storage,Average Inflow,Instantaneous Outflow'
- for level in midnight_table:
- print reformat_date(level['timestamp']) + ',' + str(level['elevation']) + ',' + \
- str(level['storage']) + ',' + str(level['average_inflow']) + ',' + \
- str(level['instantaneous_outflow'])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement