Advertisement
Guest User

Untitled

a guest
Dec 6th, 2019
99
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.74 KB | None | 0 0
  1. import odkl.analysis.spark.util.DateRange
  2. val dates = DateRange("4 days ago:today")
  3.  
  4. val data = dates.readParquet(sqlc, "/stats/metrics/rowan/").groupBy("userId").pivot("operation").agg(sum("count"))
  5. .withColumn("SCORE", $"VOTE" - $"MISTAKES")
  6. .orderBy($"SCORE".desc)
  7.  
  8. val followers = sqlContext.read.parquet("hdfs://datalab-hadoop-dwh-stable/data/ok/CommunityRecords/v20191205")
  9. .filter($"community_id" === "52246588424410")
  10. .filter($"status".isin("A", "M", "!")).filter($"deleted" === "false")
  11. .select($"customer_id" as "userId")
  12.  
  13. val top_from_group = data.join(followers, Seq("userId"))
  14.  
  15. z show top_from_group.select($"userId", $"MISTAKES", $"VOTE", $"SCORE").filter($"MISTAKES" < 300).orderBy($"SCORE".desc).limit(50)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement