Guest User

sql cube to pandas

a guest
Apr 25th, 2017
13
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.42 KB | None | 0 0
  1. from sqlalchemy import create_engine
  2. import pandas as pd
  3. import numpy as np
  4. import functools
  5.  
  6. orig_df = None
  7. def get_cube(*fields):
  8.     global orig_df
  9.     # fields that we use in SQL group by rollup
  10.     all_fields = ['transit_time', 'rounded_area', 'epoch', 'year', 'city_id']
  11.     if set(fields) - set(all_fields):
  12.         raise ValueError('fields %s are not in the columns' % ', '.join(set(fields) - set(all_fields)))
  13.  
  14.     # we store the original dataframe to get new groups again
  15.     # this is purely procedural and stored in global space, as I have the same cube in the entire project
  16.     if orig_df is None:
  17.         engine = create_engine('postgresql://localhost:5432/alcohol')
  18.         orig_df = pd.read_sql_query('select * from "cube_stat"', engine)
  19.  
  20.     df = orig_df
  21.     # in cube, fields that we omit have to be null
  22.     for f in (set(all_fields) - set(fields)):
  23.         df = df.loc[pd.isnull(df[f])]
  24.  
  25.     # fields that we iterate on must be not null
  26.     for f in fields:
  27.         df = df[df[f].notnull()]
  28.  
  29.     return df
  30.  
  31. # same but a standard iterator over cities to build charts
  32. def iter_cities(*fields):
  33.     fields = set(fields) | set(['city_id'])
  34.     cube = get_cube(*list(fields))
  35.  
  36.     for city_name, group in cube.groupby('name'):
  37.         if city_name is None:
  38.             continue
  39.  
  40.         df1 = pd.DataFrame(group)
  41.         if len(df1) == 0:
  42.             continue
  43.        
  44.         yield city_name, df1
Advertisement
Add Comment
Please, Sign In to add comment