Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from pyspark.sql.functions import col, desc
- from pyspark.sql import Window
- import pyspark.sql.functions as psf
- # Create a dataframe for just 2008 data
- df08 = df_renamed_dest.filter(col("flight_year") == 2008)
- # Create a dataframe for total inbound passengers of each airport
- ranked = df08.groupBy("destination_airport_name").sum("passengers")
- # Use Window to order and rank by the number of inbound passengers
- windowA = Window.orderBy(psf.desc("sum(passengers)"))
- dfr = ranked.withColumn("Rank", psf.dense_rank().over(windowA))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement