Advertisement
Guest User

elev_data.py

a guest
Sep 17th, 2013
192
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.66 KB | None | 0 0
  1. import urllib2
  2. import re
  3.  
  4. # how far back in days to look
  5. day_count = 3
  6.  
  7. url = 'http://198.17.86.43/cgi-bin/cgiwrap/zinger/slBasin2Hgl.py?dataType=Elev&locn=Prado+%28GOES%29&days=' + str(day_count) + '&req=Text'
  8.  
  9. # download the HTML
  10. print "Downloading " + str(day_count) + " day(s) of levels"
  11. elev_data_page = urllib2.urlopen(url).read()
  12.  
  13. # regular expression format for the hidden data
  14. hidden_data_regex = '^\(([0-9.]*), u\'([0-9]{8} [0-9]{4})\'\)'
  15.  
  16. # pull all the hidden data from the HTML
  17. all_levels = re.findall(hidden_data_regex, elev_data_page, re.MULTILINE)
  18.  
  19. # if you want to do level comparisons, the levels need to
  20. # be numbers instead of strings, convert and round them
  21. # to 2 decimals using a list comprehension
  22. all_levels = [ (round(float(level), 2), time) for (level, time) in all_levels ]
  23.  
  24. # there's no structure to the data in the web-page, gotta do some
  25. # hard work here. first, get all the unique timestamps.
  26. unique_times = sorted(list(set([ time for (level, time) in all_levels ])))
  27.  
  28. # now, lets assume every value should have 4 data points...
  29. # discard the ones that don't. FYI: only data on the hour has
  30. # 4 points - change this if quarter-hourly data is needed.
  31. level_table = []
  32. ignored_counter = 0
  33. for one_time in unique_times:
  34.     data_points = filter(lambda time: time[1] == one_time, all_levels)
  35.     if len(data_points) == 4:
  36.         level_table.append({'timestamp': one_time,
  37.                             'elevation': data_points[0][0],
  38.                             'storage': data_points[1][0],
  39.                             'average_inflow': data_points[2][0],
  40.                             'instantaneous_outflow': data_points[3][0]
  41.                             })
  42.     else:
  43.         #print 'Ignoring timestamp: \'' + one_time + '\'. Only ' + str(len(data_points)) + ' data points'
  44.         ignored_counter += 1
  45.  
  46. print 'Ignoring ' + str(ignored_counter) + ' data points with insufficient info.'
  47.  
  48. # filter out only the midnight values (using a lambda function)
  49. true_if_midnight = lambda item: item['timestamp'].endswith('0000')
  50.  
  51. # use the lambda function to filter the midnight values only
  52. midnight_table = filter(true_if_midnight, level_table)
  53.  
  54. # a function to insert hyphens and colons to make the date pretty
  55. def reformat_date(time):
  56.     return time[0:2] + '-' + time[2:4] + '-' + time[4:8] + ' ' + time[9:11] + ':' + time[11:13]
  57.  
  58.  
  59. print ""
  60. print "CSV formatted:"
  61. print "---------------------------------------------"
  62. print ""
  63.  
  64. print 'Timestamp,Elevation,Storage,Average Inflow,Instantaneous Outflow'
  65.  
  66. for level in midnight_table:
  67.     print reformat_date(level['timestamp']) + ',' + str(level['elevation']) + ',' + \
  68.                     str(level['storage']) + ',' + str(level['average_inflow']) + ',' + \
  69.                     str(level['instantaneous_outflow'])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement