Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ad = t.arrdelay.mean().name('ad')
- dd = t.depdelay.mean().name('dd')
- flt_bet = t.count().name('fbp')
- origin = t.origin_city.name('origin')
- dest = t.dest_city.name('dest')
- fbp_expr = t.group_by([origin, dest]).aggregate(flt_bet)
- #print(fbp_expr.compile())
- expr = t\
- .group_by(['origin_city','dest_city'])\
- .having([ad.notnull(), dd.notnull(), ad > 10, dd > 10])\
- .aggregate([ad,dd])
- #print(expr.compile())
- fc = fbp_expr.view()
- #Create a join expr - for all pairs of cities with at least 5000 flights between the pair,
- #find where there were at least an average delay of 10 mins for both arrival and departure
- join_expr = fc[(fc.fbp > 5000) ]\
- .inner_join(expr,[(fc.origin == expr.origin_city),(fc.dest == expr.dest_city)])[expr, fc.fbp]\
- .sort_by([('ad',False),('dd',False)]).limit(100)
- #print(join_expr.compile())
- delay_pairs_chart = alt.Chart(join_expr).mark_circle().encode(
- alt.X(alt.repeat("column"), type='quantitative'),
- alt.Y(alt.repeat("row"), type='quantitative'),
- color='fbp:N'
- ).properties(
- width=150,
- height=150
- ).repeat(
- row=['dd', 'ad', 'fbp'],
- column=['dd', 'ad', 'fbp']
- ).interactive()
Add Comment
Please, Sign In to add comment