Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import odkl.analysis.spark.util.DateRange
- val dates = DateRange("4 days ago:today")
- val data = dates.readParquet(sqlc, "/stats/metrics/rowan/").groupBy("userId").pivot("operation").agg(sum("count"))
- .withColumn("SCORE", $"VOTE" - $"MISTAKES")
- .orderBy($"SCORE".desc)
- val followers = sqlContext.read.parquet("hdfs://datalab-hadoop-dwh-stable/data/ok/CommunityRecords/v20191205")
- .filter($"community_id" === "52246588424410")
- .filter($"status".isin("A", "M", "!")).filter($"deleted" === "false")
- .select($"customer_id" as "userId")
- val top_from_group = data.join(followers, Seq("userId"))
- z show top_from_group.select($"userId", $"MISTAKES", $"VOTE", $"SCORE").filter($"MISTAKES" < 300).orderBy($"SCORE".desc).limit(50)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement