Advertisement
Guest User

Untitled

a guest
Mar 19th, 2019
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.50 KB | None | 0 0
  1. import org.apache.spark.sql.types._
  2. import org.apache.spark.sql.Row
  3.  
  4. val schema = StructType(Seq(
  5. StructField("service_id", StringType, true),
  6. StructField("date", StringType, true),
  7. StructField("exception_type", IntegerType, true)))
  8.  
  9. val cd = sc.textFile("hdfs://node1:9000/user/folakemi/stm/gtfs/staging/calendar_dates/calendar_dates_3.txt").map(c => {
  10. val fields = c.split(",")
  11. Row(fields(0), fields(1), fields(2).toInt)
  12. })
  13. cd.take(5).foreach(print)
  14. val cdDF = spark.createDataFrame(cd, schema)
  15. cdDF.printSchema()
  16. cdDF.show()
  17.  
  18. /******************OUTPUT***********************/
  19. +-----------------+--------+--------------+ | service_id| date|exception_type| +-----------------+--------+--------------+ |18N-H50N000S-86-S|20181225| 2| |18N-H50N000S-86-S|20181226| 2| |18N-H50N000S-86-S|20190101| 2| |18N-H50N000S-86-S|20190102| 2| |18N-H54N000S-81-S|20181225| 2| |18N-H54N000S-81-S|20181226| 2| |18N-H54N000S-81-S|20190101| 2| |18N-H54N000S-81-S|20190102| 2| |18N-H55N000S-84-S|20181225| 2| |18N-H55N000S-84-S|20181226| 2| |18N-H55N000S-84-S|20190101| 2| |18N-H55N000S-84-S|20190102| 2| |18N-H56N000S-84-S|20181225| 2| |18N-H56N000S-84-S|20181226| 2| |18N-H56N000S-84-S|20190101| 2| |18N-H56N000S-84-S|20190102| 2| |18N-H57N000S-82-S|20181225| 2| |18N-H57N000S-82-S|20181226| 2| |18N-H57N000S-82-S|20190101| 2| |18N-H57N000S-82-S|20190102| 2| +-----------------+--------+--------------+ only showing top 20 rows import org.apache.spark.sql.types._ import org.apache.spark.sql.Row schema: org.apache.spark.sql.types.StructType = StructType(StructField(service_id,StringType,true), StructField(date,StringType,true), StructField(exception_type,IntegerType,true)) cd: org.apache.spark.rdd.RDD[org.apache.spark.sql.Row] = MapPartitionsRDD[55] at map at <console>:33 cdDF: org.apache.spark.sql.DataFrame = [service_id: string, date: string ... 1 more field]
  20.  
  21. [18N-H50N000S-86-S,20181225,2][18N-H50N000S-86-S,20181226,2][18N-H50N000S-86-S,20190101,2][18N-H50N000S-86-S,20190102,2][18N-H54N000S-81-S,20181225,2]root |-- service_id: string (nullable = true) |-- date: string (nullable = true) |-- exception_type: integer (nullable = true)
  22. /************************************************/
  23.  
  24. /***********************************Spark SQL******************************/
  25. cdDF.createOrReplaceTempView("calendar_dates_3")
  26. /*********************************************************************/
  27. *********************Spark Chart**********************************/
  28. %sql
  29. select * from calendar_dates_3
  30. /**************************************************/
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement