Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import google_maps as g
- import pandas as pd
- import re
- file = pd.ExcelFile('./polling_stations.xlsx')
- writer = pd.ExcelWriter('./polling_stations_cleaned.xlsx',engine='xlsxwriter')
- sheet_name = file.sheet_names[0]
- sheet = file.parse(sheet_name)
- sheet = sheet.drop(columns="Address of Polling Station (Hindi)")
- column_name = 'Locality'
- ward_lst = []
- ward_num = []
- for i in range(len(sheet[column_name])):
- temp = sheet[column_name][i]
- if re.match(r'PA|J',temp) is not None:
- ward_lst.append('PASCHIM VIHAR')
- ward_num.append(67)
- elif re.match(r'RANI|MULTAN|NEW|RISHI|SHAKUR',temp) is not None:
- ward_lst.append('RANI BAGH')
- ward_num.append(66)
- elif re.match(r'PE|SA',temp) is not None:
- ward_lst.append('SARASWATI VIHAR')
- ward_num.append(65)
- ward_lat = []
- ward_long = []
- column_name = 'Polling Area'
- booth_lat = []
- booth_long = []
- polling_addresses = []
- count = 1
- i = 0
- while(i<len(sheet[column_name])):
- count = 0
- temp = sheet[column_name][i]
- addr = re.split(r'[1-9] - ',temp)
- for item in addr:
- if item is not '':
- polling_addresses.append(item)
- count = count + 1
- i = i+count
- print(len(polling_addresses))
- for item in ward_lst:
- geo_data = g.get_geocoding(item)
- ward_lat.append(geo_data['latitude'])
- ward_long.append(geo_data['longitude'])
- for addr in polling_addresses:
- geo_data = g.get_geocoding(addr)
- booth_lat.append(geo_data['latitude'])
- booth_long.append(geo_data['longitude'])
- sheet['WardName'] = pd.Series(ward_lst)
- sheet['WardNumber'] = pd.Series(ward_num)
- sheet['WardLatitude'] = pd.Series(ward_lat)
- sheet['WardLongitude'] = pd.Series(ward_long)
- sheet['PollingBooth'] = pd.Series(polling_addresses)
- sheet['BoothLatitude'] = pd.Series(booth_lat)
- sheet['BoothLongitude'] = pd.Series(booth_long)
- sheet.to_excel(writer, sheet_name=sheet_name)
- writer.save()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement