Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sqlalchemy import create_engine
- import pandas as pd
- import numpy as np
- import functools
- orig_df = None
- def get_cube(*fields):
- global orig_df
- # fields that we use in SQL group by rollup
- all_fields = ['transit_time', 'rounded_area', 'epoch', 'year', 'city_id']
- if set(fields) - set(all_fields):
- raise ValueError('fields %s are not in the columns' % ', '.join(set(fields) - set(all_fields)))
- # we store the original dataframe to get new groups again
- # this is purely procedural and stored in global space, as I have the same cube in the entire project
- if orig_df is None:
- engine = create_engine('postgresql://localhost:5432/alcohol')
- orig_df = pd.read_sql_query('select * from "cube_stat"', engine)
- df = orig_df
- # in cube, fields that we omit have to be null
- for f in (set(all_fields) - set(fields)):
- df = df.loc[pd.isnull(df[f])]
- # fields that we iterate on must be not null
- for f in fields:
- df = df[df[f].notnull()]
- return df
- # same but a standard iterator over cities to build charts
- def iter_cities(*fields):
- fields = set(fields) | set(['city_id'])
- cube = get_cube(*list(fields))
- for city_name, group in cube.groupby('name'):
- if city_name is None:
- continue
- df1 = pd.DataFrame(group)
- if len(df1) == 0:
- continue
- yield city_name, df1
Advertisement
Add Comment
Please, Sign In to add comment