Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from pyspark.sql.functions import col, desc
- stateByZhvi = home.select('State','Zhvi').groupBy((col("State"))).avg("Zhvi").show()
- +-----+------------------+
- |State| avg(Zhvi)|
- +-----+------------------+
- | AZ|246687.01298701297|
- | SC|143188.94736842104|
- | LA|159991.74311926606|
- | MN|236449.40239043825|
- | NJ| 367156.5637065637|
- | DC| 586109.5238095238|
- | OR| 306646.3768115942|
- | VA| 282764.4986449864|
- home.createOrReplaceTempView("home")
- spark.sql("select State, round(avg(Zhvi)) as avg_Zhvi from home group by State order by 2 desc").show()
- // input dataframe
- +-----+------------------+
- |State| avg|
- +-----+------------------+
- | AZ|246687.01298701297|
- | SC|143188.94736842104|
- | LA|159991.74311926606|
- +-----+------------------+
- df.orderBy(desc("avg")).show()
- //
- +-----+------------------+
- |State| avg|
- +-----+------------------+
- | AZ|246687.01298701297|
- | LA|159991.74311926606|
- | SC|143188.94736842104|
- +-----+------------------+
Add Comment
Please, Sign In to add comment