Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- +----+---------+--------+
- |col1| col2| col3|
- +----+---------+--------+
- | 1|pi number|3.141592|
- | 2| e number| 2.71828|
- +----+---------+--------+
- // read Spark Output Fixed width table:
- val t = spark.read
- .option("header","true")
- .option("inferSchema","true")
- .option("delimiter","|")
- .option("comment","+")
- .csv("file:///temp/spark.out")
- // select not-null columns
- val cols = t.columns.filterNot(c => c.startsWith("_c")).map(a => t(a))
- // trim spaces from columns
- val colsTrimmed = t.columns.filterNot(c => c.startsWith("_c")).map(c => c.replaceAll("\s+",""))
- // reanme columns using 'colsTrimmed'
- val df = t.select(cols:_*).toDF(colsTrimmed:_*)
- scala> df.show
- +----+---------+--------+
- |col1| col2| col3|
- +----+---------+--------+
- | 1.0|pi number|3.141592|
- | 2.0| e number| 2.71828|
- +----+---------+--------+
- scala> df.printSchema
- root
- |-- col1: double (nullable = true)
- |-- col2: string (nullable = true)
- |-- col3: double (nullable = true)
- // read Spark Output Fixed width table:
- val t = spark.read
- .option("header","true")
- .option("inferSchema","true")
- .option("delimiter","|")
- .option("parserLib","UNIVOCITY")
- .option("ignoreLeadingWhiteSpace",true)
- .option("ignoreTrailingWhiteSpace",true)
- .option("comment","+")
- .csv("file:///temp/spark.out")
- // select not-null columns
- val cols = t.columns.filterNot(c => c.startsWith("_c")).map(a => t(a))
- // reanme columns using 'colsTrimmed'
- val df = t.select(cols:_*)
Add Comment
Please, Sign In to add comment