Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def explode_hstore(df, column):
- """Explode a column of a dataframe containing PostgreSQL hstore k-v pairs in the format of:
- "foo"=>"bar", "baz"=>"quux", ...
- Every key becomes a column. If a given row's pairs doesn't have a key, then the resulting column's value
- will contain NaN.
- """
- # split the tags column out as a new series, and break up each k=>v pair
- s = df[column].str.split(', ').apply(pd.Series, 1).stack()
- # extract the keys and values into columns
- extracted = s.str.extract(r'"(?P<key>[\w-]+)"=>"(?P<val>[\w-]+)"', re.IGNORECASE)
- # toss the unnecessary multi index that is generated in the process
- extracted.index = extracted.index.droplevel(1)
- # pivot the table make the rows in keys become columns
- pivoted = extracted.pivot_table(values='val', index=extracted.index, columns='key', aggfunc='first')
- # join with the original table and return
- return df.join(pivoted)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement