Advertisement
Guest User

ConvertToCSV

a guest
Nov 21st, 2020
253
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 7.34 KB | None | 0 0
  1. import json
  2. import os
  3. import re
  4.  
  5. priorValues = {}
  6.  
  7.  
  8. def BuildCompoundKey(*keys: str) -> str:
  9.     compoundKey = ''
  10.     for key in keys:
  11.         compoundKey += f'{key}|'
  12.     return compoundKey
  13.  
  14.  
  15. def GetState(fileName: str) -> str:
  16.     fileName = fileName.replace('\\', '/')
  17.     fileName = fileName.replace('.', '')
  18.     fileParts = fileName.split('/')
  19.     fileParts.reverse()
  20.     state = fileParts[0]
  21.     return state[:2]
  22.  
  23.  
  24. def GetTimeStamp(fileName: str) -> str:
  25.     timeStamp = None
  26.     timePattern = '[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}_[0-9]{2}_[0-9]{2}.[0-9]{3}'
  27.     regex = re.compile(timePattern)
  28.     timeStampArr = regex.findall(fileName)
  29.     if timeStampArr:
  30.         timeStamp = timeStampArr[len(timeStampArr)-1].replace('_', ':').replace('T', ' ')
  31.     return timeStamp
  32.  
  33.  
  34. def ProcessCountyData(fileName: str) -> None:
  35.     timeStamp = GetTimeStamp(fileName)
  36.     if timeStamp == None:
  37.         timeStamp = fileName
  38.     stateAbbrev = GetState(fileName)
  39.     outputFileName = f'{stateAbbrev}CountyData.csv'
  40.     with open(fileName, 'r') as jsonFile:
  41.         jsonData = json.load(jsonFile)
  42.         if 'county_by_vote_type' in jsonData:
  43.             countyData = jsonData['county_by_vote_type']
  44.             addHeader = False
  45.             if not os.path.exists(outputFileName):
  46.                 addHeader = True
  47.             with open(outputFileName, 'a') as outFile:
  48.                 if addHeader:
  49.                     line = '"timeStamp","countyName","voteType","candidate","votes","delta"\n'
  50.                     outFile.write(line)
  51.                 for county in countyData:
  52.                     countyName = county['locality_name']
  53.                     voteType = county['vote_type']
  54.                     candidateResults = county['results']
  55.                     for candidate in county['results']:
  56.                         key = BuildCompoundKey(countyName, voteType, candidate)
  57.                         priorValue = 0
  58.                         if key in priorValues:
  59.                             priorValue = priorValues[key]
  60.                         votes = candidateResults[candidate]
  61.                         delta = votes - priorValue
  62.                         # print(f'{timeStamp} | {countyName} | {voteType} | {candidate} | {votes} | {delta}')
  63.                         priorValues[key] = votes
  64.                         line = f'"{timeStamp}","{countyName}","{voteType}","{candidate}","{votes}","{delta}"' + '\n'
  65.                         outFile.write(line)
  66.  
  67.  
  68. def ProcessPaPrecinctData(fileName: str) -> None:
  69.     """ Pennsylvania json files have two precinct sections
  70.        and Chester + Philly counties have vote_type field missing
  71.    """
  72.     precinctSections = ['precinct_totals','precinct_by_vote_type']
  73.     timeStamp = GetTimeStamp(fileName)
  74.     if timeStamp == None:
  75.         timeStamp = fileName
  76.     stateAbbrev = GetState(fileName)
  77.     outputFileName = f'{stateAbbrev}PrecinctData.csv'
  78.     with open(fileName, 'r') as jsonFile:
  79.         jsonData = json.load(jsonFile)
  80.         if 'precinct_totals' in jsonData and 'precinct_by_vote_type' in jsonData:
  81.             addHeader = False
  82.             if not os.path.exists(outputFileName):
  83.                 addHeader = True
  84.             with open(outputFileName, 'a') as outFile:
  85.                 if addHeader:
  86.                     line = '"timeStamp","precinctId","countyName","voteType","candidate","votes","delta"\n'
  87.                     outFile.write(line)
  88.                 for precinctSection in precinctSections:
  89.                     precinctData = jsonData[precinctSection]
  90.                     for precinct in precinctData:
  91.                         precinctId = precinct['precinct_id']
  92.                         countyName = precinct['locality_name']
  93.                         if 'vote_type' in precinct:
  94.                             voteType = precinct['vote_type']
  95.                         else:
  96.                             voteType = 'unknown'
  97.                         candidateResults = precinct['results']
  98.                         for candidate in precinct['results']:
  99.                             key = BuildCompoundKey(precinctId, countyName, voteType, candidate)
  100.                             priorValue = 0
  101.                             if key in priorValues:
  102.                                 priorValue = priorValues[key]
  103.                             votes = candidateResults[candidate]
  104.                             delta = votes - priorValue
  105.                             # print(f'{timeStamp} | {precinctId} | {countyName} | {voteType} | {candidate} | {votes} | {delta}')
  106.                             priorValues[key] = votes
  107.                             line = f'"{timeStamp}","{precinctId}","{countyName}","{voteType}","{candidate}","{votes}","{delta}"' + '\n'
  108.                             outFile.write(line)
  109.  
  110.  
  111. def ProcessPrecinctData(fileName: str) -> None:
  112.     timeStamp = GetTimeStamp(fileName)
  113.     if timeStamp == None:
  114.         timeStamp = fileName
  115.     stateAbbrev = GetState(fileName)
  116.     outputFileName = f'{stateAbbrev}PrecinctData.csv'
  117.     with open(fileName, 'r') as jsonFile:
  118.         jsonData = json.load(jsonFile)
  119.         precinctSectionName = None
  120.         if 'precincts' in jsonData:
  121.             precinctData = jsonData['precincts']
  122.             addHeader = False
  123.             if not os.path.exists(outputFileName):
  124.                 addHeader = True
  125.             with open(outputFileName, 'a') as outFile:
  126.                 if addHeader:
  127.                     line = '"timeStamp","precinctId","countyName","voteType","candidate","votes","delta"\n'
  128.                     outFile.write(line)
  129.                 for precinct in precinctData:
  130.                     precinctId = precinct['precinct_id']
  131.                     countyName = precinct['locality_name']
  132.                     if 'vote_type' in precinct:
  133.                         voteType = precinct['vote_type']
  134.                     else:
  135.                         # shouldn't happen outside of PA...but why not
  136.                         voteType = 'Unknown'
  137.                     candidateResults = precinct['results']
  138.                     for candidate in precinct['results']:
  139.                         key = BuildCompoundKey(precinctId, countyName, voteType, candidate)
  140.                         priorValue = 0
  141.                         if key in priorValues:
  142.                             priorValue = priorValues[key]
  143.                         votes = candidateResults[candidate]
  144.                         delta = votes - priorValue
  145.                         # print(f'{timeStamp} | {precinctId} | {countyName} | {voteType} | {candidate} | {votes} | {delta}')
  146.                         priorValues[key] = votes
  147.                         line = f'"{timeStamp}","{precinctId}","{countyName}","{voteType}","{candidate}","{votes}","{delta}"' + '\n'
  148.                         outFile.write(line)
  149.  
  150.  
  151. def ParseFile(fileName: str) -> None:
  152.     print(f'Processing {fileName}')
  153.     stateAbbrev = GetState(fileName)
  154.     # TODO: Cleanup this mess
  155.     if stateAbbrev == 'PA':
  156.         ProcessCountyData(fileName)
  157.         ProcessPaPrecinctData(fileName)
  158.     else:
  159.         ProcessPrecinctData(fileName)
  160.  
  161.  
  162. if __name__ == '__main__':
  163.     for (directoryPath, subdirectoryList, fileList) in os.walk(os.curdir):
  164.         priorValues.clear()
  165.         fileList.sort()
  166.         for fileNameSingle in fileList:
  167.             if fileNameSingle.endswith('.json'):
  168.                 ParseFile(os.path.join(directoryPath, fileNameSingle))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement