Advertisement
Guest User

Untitled

a guest
Sep 11th, 2019
120
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Scala 0.69 KB | None | 0 0
  1. val streamingDataFrame = spark.readStream
  2.     .schema(staticSchema)
  3.     .option("maxFilesPerTrigger", 1)
  4.     .format("csv")
  5.     .option("header", "true")
  6.     .load("/data/retail-data/by-day/*.csv")
  7.  
  8. val purchaseByCustomerPerHour = streamingDataFrame
  9.   .selectExpr(
  10.     "CustomerId",
  11.     "(UnitPrice * Quantity) as total_cost",
  12.     "InvoiceDate")
  13.   .groupBy(
  14.     $"CustomerId", window($"InvoiceDate", "1 day"))
  15.   .sum("total_cost")
  16.  
  17. purchaseByCustomerPerHour.writeStream
  18.     .format("memory") // memory = store in-memory table
  19.     .queryName("customer_purchases") // the name of the in-memory table
  20.     .outputMode("complete") // complete = all the counts should be in the table
  21.     .start()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement