Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- STATE_CODE = 'state_code'
- COUNTY_CODE = 'county_code'
- TRACT_CODE = 'tract_code'
- def convert_block_code_to_str(df):
- """
- A function to convert the block_code Series to a 15-character string.
- Params
- df(pandas.DataFrame): A Pandas DataFrame with a block_code column.
- """
- return df.assign(block_code=df.block_code.astype(str).str.zfill(15))
- def add_geographies(df, geographies):
- """
- A function to add subcategories of a full 15-character block code, such as
- a state_code, county_code, or tract_code. If the block_code column of df
- is not a string, it will first be converted to a string.
- Params
- df(pandas.DataFrame): A Pandas DataFrame with a block_code column.
- geographies(list): A list of geographies
- Return:
- The given pandas.DataFrame with the given geographies added, assuming the
- geographies are valid.
- """
- if not (df.block_code.dtype == np.dtype('O')):
- df = convert_block_code_to_str(df)
- for geography in geographies:
- if geography == STATE_CODE:
- df = df.assign(state_code=df.block_code.str[0:2])
- elif geography == COUNTY_CODE:
- df = df.assign(county_code=df.block_code.str[0:5])
- elif geography == TRACT_CODE:
- df = df.assign(tract_code=df.block_code.str[0:11])
- else:
- print("Geography {} not recognized".format(geography))
- return df
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement