Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # The clinic names and addresses are mixed
- clinic_col = ("PERIOD,PRACTICE,CLINIC NAME ONE,CLINIC NAME TWO,ROAD NAME,"
- "TOWN,COUNTY,UNKNOWN").split(",")
- clinic = pd.read_csv("uk_drug_prescription/T201611ADDR+BNFT.CSV",
- names = clinic_col)
- # Remove whitespace from all the columns
- rm_ws_col = ("CLINIC NAME ONE,CLINIC NAME TWO,ROAD NAME,TOWN,COUNTY,"
- "UNKNOWN").split(",")
- for i in rm_ws_col:
- clinic[i] = list(map(rm_ws, clinic[i]))
- # Arrange the clinic names and road names into correct columns
- a = clinic.loc[:, "CLINIC NAME TWO"]
- b = clinic.loc[:, "ROAD NAME"]
- tf = clinic.loc[:, "ROAD NAME"] == ""
- for key, value in enumerate(tf):
- if value == True:
- a.iloc[key], b.iloc[key] = b.iloc[key], a.iloc[key] # swapping values
- # shallow.copy
- # Remove all dashes in TOWN and COUNTY column
- def remove_dashes(row, column):
- t = row[column]
- return re.sub(r"-", " ", t)
- town_dashes = partial(remove_dashes, column = "TOWN")
- clinic["TOWN"] = clinic.apply(town_dashes, axis = 1)
- county_dashes = partial(remove_dashes, column = "COUNTY")
- clinic["COUNTY"] = clinic.apply(county_dashes, axis = 1)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement