Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.sql.Timestamp
- import java.time.{LocalDateTime, ZoneId, ZonedDateTime}
- import com.cloudera.sparkts._
- import org.apache.spark.sql.SparkSession
- import org.apache.spark.sql.functions.udf
- import org.apache.spark.sql.functions._
- /**
- * Created by josep2 on 12/5/16.
- */
- case class TimeSeries(id: String, week_beginning: String, sales: Double)
- object TimeSeriesBlog extends App {
- val sparkSession = SparkSession.builder
- .master("local[4]")
- .appName("Panel Engine")
- .config("spark.driver.memory", "1g")
- .config("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
- .getOrCreate()
- import sparkSession.implicits._
- var TimeSeriesData = sparkSession.sparkContext.textFile("./data.txt")
- .map(_.split(","))
- .map(row => TimeSeries(row(0), row(1), row(2).toDouble))
- .toDF()
- def makeDate = udf((s: String) => Timestamp.from(ZonedDateTime.of(LocalDateTime.parse(s + "T00:00:00"), ZoneId.systemDefault()).toInstant))
- TimeSeriesData = TimeSeriesData.withColumn("week_beginning", makeDate(TimeSeriesData("week_beginning")))
- val zone = ZoneId.systemDefault()
- val dtIndex = DateTimeIndex.uniformFromInterval(
- ZonedDateTime.of(LocalDateTime.parse("2016-01-04T00:00:00"), zone),
- ZonedDateTime.of(LocalDateTime.parse("2016-04-04T00:00:00"), zone),
- new DayFrequency(7))
- val dataRdd = TimeSeriesRDD.timeSeriesRDDFromObservations(dtIndex, TimeSeriesData,
- "week_beginning", "id", "sales")
- sparkSession.sparkContext.stop()
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement