Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys # Needed for sys.argv
- import csv
- from datetime import datetime, timedelta
- #from dateutil.relativedelta import relativedelta
- def get_climate(in_filename, out_filename):
- """Read historical weather from in_filename, write climate to out_filename.
- Parameters
- ----------
- in_filename : str
- name of the input file
- out_filename : str
- name of the output file
- """
- avg_precip = {} # {"MM/DD":0.15, "MM/DD":0.25}
- avg_low = {}
- avg_high= {}
- min_low = {}
- max_high = {}
- record_low_year = {}
- record_high_year = {}
- count = 1
- base_date = datetime.strptime("1895-07-01", '%Y-%m-%d').date()
- #WRITE FILE
- result = open(out_filename, 'w')
- result.write('Day,Avg precip,Avg low,Avg high,Min low,Max high,Min low year,Max high year\n')
- #open the file
- with open(in_filename, 'r') as csvfile:
- csvreader = csv.reader(csvfile, delimiter=","); next(csvreader) # This skips the first row of the CSV file.
- for row in csvreader: # 1895-07-01,0.15,77,57
- if row[1] == "":
- precipitation = 0
- else:
- precipitation = float(row[1])
- try:
- #date.append(row[0]) # 1895-07-01
- #precipitation = float(row[1]) #0.15
- tmax = float(row[2]) # 77
- tmin = float(row[3]) # 57
- except:
- #SKIPS IF THERES NO TMIN OR TMAX
- sys.stderr.write('Bad number in line' + str(row) + '\n')
- continue
- #--- process the dates inside list
- for fmt in ('%Y-%m-%d', '%m/%d/%y'):
- try:
- #--- PUT INTO THIS FORMAT 1895-07-02
- example_time = datetime.strptime(row[0], fmt).date()
- #-- DEALING WITH CWNTURY YEARS
- print("example time", example_time)
- # print("is it after this date?", datetime.strptime("1/1/04", '%m/%d/%y').date())
- if example_time >= datetime.strptime("1/1/04", '%m/%d/%y').date():
- print("***")
- #example_time -= timedelta(days=36525)
- example_time = example_time.replace(year=example_time.year-100)
- #--- GRAB THE YEAR ONLY, IN THIS CASE ITS 1895
- year_now = example_time.strftime("%Y")
- date_now = example_time #datetime.strptime(row[0], fmt).date()
- #### print("final output", datetime.strftime(example_time, "%m/%d/%Y"))
- #-- GETTING ONLY MONTHS AND DAY (12/01)
- the_month = datetime.strftime(example_time, "%m")
- the_day = datetime.strftime(example_time, "%d")
- month_day = the_month + "/" + the_day
- except ValueError:
- continue
- #-- If the year changes, increase count
- print("date now", example_time, "incrementing when it hits", (base_date + timedelta(days=365)))
- if example_time > (base_date + timedelta(days=365)):
- base_date = example_time
- print("incrementing count... ")
- count += 1
- else:
- pass
- #-- POPULATE avg_precip DICTIONARY
- if month_day in avg_precip:
- #-- IF THE DATE IS ALREADY IN THE LIST
- avg_precip[month_day] += precipitation
- #count += 1
- else:
- # ADD NEW MONTH_DAY AS THE NEW KEY
- avg_precip[month_day] = precipitation
- #-- POPULATE avg_low DICTIONARY
- if month_day in avg_low:
- avg_low[month_day] += tmin
- else:
- avg_low[month_day] = tmin
- #-- POPULATE avg_high DICTIONARY
- if month_day in avg_high:
- avg_high[month_day] += tmax
- else:
- avg_high[month_day] = tmax
- #-- REPLACE min_low DICTIONARY
- if month_day in min_low:
- if tmin < avg_low[month_day]:
- min_low[month_day] = tmin
- else:
- min_low[month_day] = tmin
- #-- REPLACE max_high DICTIONARY
- if month_day in max_high:
- if tmax > avg_high[month_day]:
- max_high[month_day] = tmax
- else:
- max_high[month_day] = tmax
- #-- REPLACE record_low_year DICTIONARY
- if year_now in record_low_year:
- if tmin < record_low_year[year_now]:
- record_low_year[year_now] = tmin
- else:
- record_low_year[year_now] = tmin
- #-- REPLACE record_low_year DICTIONARY
- if year_now in record_high_year:
- if tmax > record_high_year[year_now]:
- record_high_year[year_now] = tmax
- else:
- record_high_year[year_now] = tmax
- avg_precip = get_average(avg_precip,count)
- avg_low = get_average(avg_low,count)
- avg_high = get_average(avg_high,count)
- print("avg_low dict", avg_low)
- print("lowest in a year", record_low_year)
- for day_of_year in avg_precip.keys():
- precip = avg_precip[day_of_year]
- lowavg = avg_low[day_of_year]
- highavg = avg_high[day_of_year]
- result.write(day_of_year + "," + str(round(precip, 2)) + "," + str(lowavg) + "," + str(round(highavg, 2)) + "\n")
- """
- What you should do:
- 1. Read each line of in_file
- 2. Skip the first (header) line
- 3. Split each line on commas
- 4. Get the year, month, and day
- 5. Update the statistics (total precip, total low temp, etc)
- 6. When done, open the output file.
- 7. for each day of the year:
- 8. Compute the climate for the day, write to output file.
- python climate.py test.csv result.csv
- """
- def get_average(dictionary, count):
- avgDict = {}
- for k,v in dictionary.items():
- #avgDict[k] = sum(v)/ float(len(v))
- avgDict[k] = v / count
- return avgDict
- def usage():
- """Complain that the user ran the program incorrect ly."""
- sys.stderr.write('Usage:\n')
- sys.stderr.write(' python climate.py <input-file.csv> <output-file.csv>\n')
- sys.exit()
- def main():
- if len(sys.argv) != 3:
- usage()
- sys.exit()
- in_filename = sys.argv[1]
- out_filename = sys.argv[2]
- get_climate(in_filename, out_filename)
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement