Advertisement
KillianMills

datajoin.py

Aug 31st, 2016
134
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.24 KB | None | 0 0
  1. import csv
  2. from collections import OrderedDict
  3.  
  4. def main():
  5.    
  6.     #npa
  7.     npa = {}
  8.     with open('npa.csv') as f:
  9.         for line in f:
  10.             current_line = line.split(',')
  11.             current_line[1] = current_line[1].strip()
  12.             if current_line[0] == "NPA":
  13.                 print "Skipping title"
  14.             elif current_line[1] in npa: # if key is in dictionary
  15.                 print("Two or locations mapped to a NPA")
  16.                 current_line[0] = current_line[0] + ", " + npa.get(current_line[1])
  17.                 npa.update({current_line[0] : current_line[1]}) # add new NPA with location
  18.             else:
  19.                 npa.update({current_line[0] : current_line[1]}) # add new Location and NPA to npa dictionary
  20.  
  21.     #sample_date
  22.     sample = {}
  23.     with open('sample_data.csv') as f:
  24.         for line in f:
  25.             current_line = line.split(',')
  26.             current_line[2] = int(current_line[2].strip())
  27.             if current_line[0][1:4] in sample: # if key is in dictionary
  28.                 current_line[2] = current_line[2] + int(sample.get(current_line[0][1:4])) # add the duration onto the previous entry
  29.                 sample.update({current_line[0][1:4] : current_line[2]}) # add new NPA with duration
  30.             else:
  31.                 sample.update({current_line[0][1:4] : current_line[2]}) # add new NPA and duration to dictionary
  32.  
  33.     #joined_data
  34.     joined = {}
  35.     sortnpa = OrderedDict(sorted(npa.items(), key=lambda t: t[0]))
  36.     sortsample = OrderedDict(sorted(sample.items(), key=lambda t: t[0]))
  37.     for key, value in sortnpa.items():
  38.         if value in joined:
  39.             if type(sortsample.get(key)) is int and type(joined.get(value)) is int:
  40.                 new = sortsample.get(key) + int(joined.get(value)) # add extra minutes if in dictionary
  41.                 joined.update({value : new}) # add new entry
  42.         else:
  43.             joined.update({value : sortsample.get(key)}) # add new entry
  44.  
  45.     #csv
  46.     with open('joined_data.csv', 'wb') as csv_file:
  47.         writer = csv.writer(csv_file)
  48.         writer.writerow(["Location", "Minutes"])
  49.         for key, value in joined.items():
  50.             writer.writerow([key, value]) # write location and duration in minutes
  51.  
  52. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement