Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import json
- import os
- import re
- priorValues = {}
- def BuildCompoundKey(*keys: str) -> str:
- compoundKey = ''
- for key in keys:
- compoundKey += f'{key}|'
- return compoundKey
- def GetState(fileName: str) -> str:
- fileName = fileName.replace('\\', '/')
- fileName = fileName.replace('.', '')
- fileParts = fileName.split('/')
- fileParts.reverse()
- state = fileParts[0]
- return state[:2]
- def GetTimeStamp(fileName: str) -> str:
- timeStamp = None
- timePattern = '[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}_[0-9]{2}_[0-9]{2}.[0-9]{3}'
- regex = re.compile(timePattern)
- timeStampArr = regex.findall(fileName)
- if timeStampArr:
- timeStamp = timeStampArr[len(timeStampArr)-1].replace('_', ':').replace('T', ' ')
- return timeStamp
- def ProcessCountyData(fileName: str) -> None:
- timeStamp = GetTimeStamp(fileName)
- if timeStamp == None:
- timeStamp = fileName
- stateAbbrev = GetState(fileName)
- outputFileName = f'{stateAbbrev}CountyData.csv'
- with open(fileName, 'r') as jsonFile:
- jsonData = json.load(jsonFile)
- if 'county_by_vote_type' in jsonData:
- countyData = jsonData['county_by_vote_type']
- addHeader = False
- if not os.path.exists(outputFileName):
- addHeader = True
- with open(outputFileName, 'a') as outFile:
- if addHeader:
- line = '"timeStamp","countyName","voteType","candidate","votes","delta"\n'
- outFile.write(line)
- for county in countyData:
- countyName = county['locality_name']
- voteType = county['vote_type']
- candidateResults = county['results']
- for candidate in county['results']:
- key = BuildCompoundKey(countyName, voteType, candidate)
- priorValue = 0
- if key in priorValues:
- priorValue = priorValues[key]
- votes = candidateResults[candidate]
- delta = votes - priorValue
- # print(f'{timeStamp} | {countyName} | {voteType} | {candidate} | {votes} | {delta}')
- priorValues[key] = votes
- line = f'"{timeStamp}","{countyName}","{voteType}","{candidate}","{votes}","{delta}"' + '\n'
- outFile.write(line)
- def ProcessPaPrecinctData(fileName: str) -> None:
- """ Pennsylvania json files have two precinct sections
- and Chester + Philly counties have vote_type field missing
- """
- precinctSections = ['precinct_totals','precinct_by_vote_type']
- timeStamp = GetTimeStamp(fileName)
- if timeStamp == None:
- timeStamp = fileName
- stateAbbrev = GetState(fileName)
- outputFileName = f'{stateAbbrev}PrecinctData.csv'
- with open(fileName, 'r') as jsonFile:
- jsonData = json.load(jsonFile)
- if 'precinct_totals' in jsonData and 'precinct_by_vote_type' in jsonData:
- addHeader = False
- if not os.path.exists(outputFileName):
- addHeader = True
- with open(outputFileName, 'a') as outFile:
- if addHeader:
- line = '"timeStamp","precinctId","countyName","voteType","candidate","votes","delta"\n'
- outFile.write(line)
- for precinctSection in precinctSections:
- precinctData = jsonData[precinctSection]
- for precinct in precinctData:
- precinctId = precinct['precinct_id']
- countyName = precinct['locality_name']
- if 'vote_type' in precinct:
- voteType = precinct['vote_type']
- else:
- voteType = 'unknown'
- candidateResults = precinct['results']
- for candidate in precinct['results']:
- key = BuildCompoundKey(precinctId, countyName, voteType, candidate)
- priorValue = 0
- if key in priorValues:
- priorValue = priorValues[key]
- votes = candidateResults[candidate]
- delta = votes - priorValue
- # print(f'{timeStamp} | {precinctId} | {countyName} | {voteType} | {candidate} | {votes} | {delta}')
- priorValues[key] = votes
- line = f'"{timeStamp}","{precinctId}","{countyName}","{voteType}","{candidate}","{votes}","{delta}"' + '\n'
- outFile.write(line)
- def ProcessPrecinctData(fileName: str) -> None:
- timeStamp = GetTimeStamp(fileName)
- if timeStamp == None:
- timeStamp = fileName
- stateAbbrev = GetState(fileName)
- outputFileName = f'{stateAbbrev}PrecinctData.csv'
- with open(fileName, 'r') as jsonFile:
- jsonData = json.load(jsonFile)
- precinctSectionName = None
- if 'precincts' in jsonData:
- precinctData = jsonData['precincts']
- addHeader = False
- if not os.path.exists(outputFileName):
- addHeader = True
- with open(outputFileName, 'a') as outFile:
- if addHeader:
- line = '"timeStamp","precinctId","countyName","voteType","candidate","votes","delta"\n'
- outFile.write(line)
- for precinct in precinctData:
- precinctId = precinct['precinct_id']
- countyName = precinct['locality_name']
- if 'vote_type' in precinct:
- voteType = precinct['vote_type']
- else:
- # shouldn't happen outside of PA...but why not
- voteType = 'Unknown'
- candidateResults = precinct['results']
- for candidate in precinct['results']:
- key = BuildCompoundKey(precinctId, countyName, voteType, candidate)
- priorValue = 0
- if key in priorValues:
- priorValue = priorValues[key]
- votes = candidateResults[candidate]
- delta = votes - priorValue
- # print(f'{timeStamp} | {precinctId} | {countyName} | {voteType} | {candidate} | {votes} | {delta}')
- priorValues[key] = votes
- line = f'"{timeStamp}","{precinctId}","{countyName}","{voteType}","{candidate}","{votes}","{delta}"' + '\n'
- outFile.write(line)
- def ParseFile(fileName: str) -> None:
- print(f'Processing {fileName}')
- stateAbbrev = GetState(fileName)
- # TODO: Cleanup this mess
- if stateAbbrev == 'PA':
- ProcessCountyData(fileName)
- ProcessPaPrecinctData(fileName)
- else:
- ProcessPrecinctData(fileName)
- if __name__ == '__main__':
- for (directoryPath, subdirectoryList, fileList) in os.walk(os.curdir):
- priorValues.clear()
- fileList.sort()
- for fileNameSingle in fileList:
- if fileNameSingle.endswith('.json'):
- ParseFile(os.path.join(directoryPath, fileNameSingle))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement