Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import org.apache.spark.sql.types._
- import org.apache.spark.sql.Row
- val schema = StructType(Seq(
- StructField("service_id", StringType, true),
- StructField("date", StringType, true),
- StructField("exception_type", IntegerType, true)))
- val cd = sc.textFile("hdfs://node1:9000/user/folakemi/stm/gtfs/staging/calendar_dates/calendar_dates_3.txt").map(c => {
- val fields = c.split(",")
- Row(fields(0), fields(1), fields(2).toInt)
- })
- cd.take(5).foreach(print)
- val cdDF = spark.createDataFrame(cd, schema)
- cdDF.printSchema()
- cdDF.show()
- /******************OUTPUT***********************/
- +-----------------+--------+--------------+ | service_id| date|exception_type| +-----------------+--------+--------------+ |18N-H50N000S-86-S|20181225| 2| |18N-H50N000S-86-S|20181226| 2| |18N-H50N000S-86-S|20190101| 2| |18N-H50N000S-86-S|20190102| 2| |18N-H54N000S-81-S|20181225| 2| |18N-H54N000S-81-S|20181226| 2| |18N-H54N000S-81-S|20190101| 2| |18N-H54N000S-81-S|20190102| 2| |18N-H55N000S-84-S|20181225| 2| |18N-H55N000S-84-S|20181226| 2| |18N-H55N000S-84-S|20190101| 2| |18N-H55N000S-84-S|20190102| 2| |18N-H56N000S-84-S|20181225| 2| |18N-H56N000S-84-S|20181226| 2| |18N-H56N000S-84-S|20190101| 2| |18N-H56N000S-84-S|20190102| 2| |18N-H57N000S-82-S|20181225| 2| |18N-H57N000S-82-S|20181226| 2| |18N-H57N000S-82-S|20190101| 2| |18N-H57N000S-82-S|20190102| 2| +-----------------+--------+--------------+ only showing top 20 rows import org.apache.spark.sql.types._ import org.apache.spark.sql.Row schema: org.apache.spark.sql.types.StructType = StructType(StructField(service_id,StringType,true), StructField(date,StringType,true), StructField(exception_type,IntegerType,true)) cd: org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = MapPartitionsRDD[55] at map at <console>:33 cdDF: org.apache.spark.sql.DataFrame = [service_id: string, date: string ... 1 more field]
- [18N-H50N000S-86-S,20181225,2][18N-H50N000S-86-S,20181226,2][18N-H50N000S-86-S,20190101,2][18N-H50N000S-86-S,20190102,2][18N-H54N000S-81-S,20181225,2]root |-- service_id: string (nullable = true) |-- date: string (nullable = true) |-- exception_type: integer (nullable = true)
- /************************************************/
- /***********************************Spark SQL******************************/
- cdDF.createOrReplaceTempView("calendar_dates_3")
- /*********************************************************************/
- *********************Spark Chart**********************************/
- %sql
- select * from calendar_dates_3
- /**************************************************/
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement