Guest User

Untitled

a guest
Oct 23rd, 2017
93
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.47 KB | None | 0 0
  1. +----+---------+--------+
  2. |col1| col2| col3|
  3. +----+---------+--------+
  4. | 1|pi number|3.141592|
  5. | 2| e number| 2.71828|
  6. +----+---------+--------+
  7.  
  8. // read Spark Output Fixed width table:
  9. val t = spark.read
  10. .option("header","true")
  11. .option("inferSchema","true")
  12. .option("delimiter","|")
  13. .option("comment","+")
  14. .csv("file:///temp/spark.out")
  15. // select not-null columns
  16. val cols = t.columns.filterNot(c => c.startsWith("_c")).map(a => t(a))
  17. // trim spaces from columns
  18. val colsTrimmed = t.columns.filterNot(c => c.startsWith("_c")).map(c => c.replaceAll("\s+",""))
  19. // reanme columns using 'colsTrimmed'
  20. val df = t.select(cols:_*).toDF(colsTrimmed:_*)
  21.  
  22. scala> df.show
  23. +----+---------+--------+
  24. |col1| col2| col3|
  25. +----+---------+--------+
  26. | 1.0|pi number|3.141592|
  27. | 2.0| e number| 2.71828|
  28. +----+---------+--------+
  29.  
  30. scala> df.printSchema
  31. root
  32. |-- col1: double (nullable = true)
  33. |-- col2: string (nullable = true)
  34. |-- col3: double (nullable = true)
  35.  
  36. // read Spark Output Fixed width table:
  37. val t = spark.read
  38. .option("header","true")
  39. .option("inferSchema","true")
  40. .option("delimiter","|")
  41. .option("parserLib","UNIVOCITY")
  42. .option("ignoreLeadingWhiteSpace",true)
  43. .option("ignoreTrailingWhiteSpace",true)
  44. .option("comment","+")
  45. .csv("file:///temp/spark.out")
  46. // select not-null columns
  47. val cols = t.columns.filterNot(c => c.startsWith("_c")).map(a => t(a))
  48. // reanme columns using 'colsTrimmed'
  49. val df = t.select(cols:_*)
Add Comment
Please, Sign In to add comment