Advertisement
Guest User

Untitled

a guest
Mar 23rd, 2017
60
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.49 KB | None | 0 0
  1. scala> import scala.xml._
  2. scala> val strxml = <employees>
  3. | <employee><id>1</id><name>chris</name></employee>
  4. | <employee><id>2</id><name>adam</name></employee>
  5. | <employee><id>3</id><name>karl</name></employee>
  6. | </employees>
  7. strxml: scala.xml.Elem =
  8. <employees>
  9. <employee><id>1</id><name>chris</name></employee>
  10. <employee><id>2</id><name>adam</name></employee>
  11. <employee><id>3</id><name>karl</name></employee>
  12. </employees>
  13.  
  14. scala> val t = strxml.flatMap(line => line \ "employee")
  15. t: scala.xml.NodeSeq = NodeSeq(<employee><id>1</id><name>chris</name></employee>, <employee><id>2</id><name>adam</name></employee>, <employee><id>3</id><name>karl</name></employee>)
  16.  
  17. scala> t.map(l => (l \ "id").text + "@" + (l \ "name").text).foreach(println)
  18. 1@chris
  19. 2@adam
  20. 3@karl
  21.  
  22. scala> val filexml = sc.wholeTextFiles("file:///home/cloudera/test*")
  23. filexml: org.apache.spark.rdd.RDD[(String, String)] = file:///home/cloudera/test* MapPartitionsRDD[66] at wholeTextFiles at <console>:30
  24.  
  25. scala> val lines = filexml.map(line => XML.loadString(line._2))
  26. lines: org.apache.spark.rdd.RDD[scala.xml.Elem] = MapPartitionsRDD[89] at map at <console>:32
  27.  
  28. scala> val ft = lines.map(l => l \ "employee")
  29. ft: org.apache.spark.rdd.RDD[scala.xml.NodeSeq] = MapPartitionsRDD[99] at map at <console>:34
  30.  
  31. scala> ft.map(l => (l \ "id").text + "@" + (l \ "name").text).foreach(println)
  32.  
  33. Exception in task 0.0 in stage 63.0 (TID 63)
  34. org.xml.sax.SAXParseException; lineNumber: 1; columnNumber: 1; Content is not allowed in prolog
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement