Guest User

Untitled

a guest
Jan 17th, 2018
328
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 24.25 KB | None | 0 0
  1. +-------------------+--------------------+----------+
  2. | CreatedAt| tweet|prediction|
  3. +-------------------+--------------------+----------+
  4. |2015-10-12 20:58:52|This Saturday is ...| 1.0|
  5. |2015-10-13 17:28:45|Sleep with a spoo...| 1.0|
  6.  
  7. pres2.write.format("com.databricks.spark.csv").save("someFile.csv")
  8. pres2.write.mode("overwrite").format("com.databricks.spark.csv").save("someFile.csv")
  9. pres2.write.mode("append").format("com.databricks.spark.csv").save("someFile.csv")
  10.  
  11. 17/03/10 16:24:02 WARN TaskSetManager: Lost task 0.0 in stage 23.0 (TID 149, ip-addy .ec2.internal, executor 9): org.apache.spark.SparkException: Task failed while writing rows
  12. at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:204)
  13. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$3.apply(FileFormatWriter.scala:129)
  14. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$3.apply(FileFormatWriter.scala:128)
  15. at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
  16. at org.apache.spark.scheduler.Task.run(Task.scala:99)
  17. at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
  18. at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
  19. at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
  20. at java.lang.Thread.run(Thread.java:745)
  21. Caused by: org.apache.spark.SparkException: Failed to execute user defined function($anonfun$createTransformFunc$2: (string) => array<string>)
  22. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)
  23. at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
  24. at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:377)
  25. at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:243)
  26. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:190)
  27. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:188)
  28. at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1341)
  29. at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:193)
  30. ... 8 more
  31. Caused by: java.lang.NullPointerException
  32. at org.apache.spark.ml.feature.RegexTokenizer$$anonfun$createTransformFunc$2.apply(Tokenizer.scala:142)
  33. at org.apache.spark.ml.feature.RegexTokenizer$$anonfun$createTransformFunc$2.apply(Tokenizer.scala:140)
  34. ... 16 more
  35.  
  36. 17/03/10 16:24:03 ERROR TaskSetManager: Task 0 in stage 23.0 failed 4 times; aborting job
  37. 17/03/10 16:24:03 ERROR FileFormatWriter: Aborting job null.
  38. org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 23.0 failed 4 times, most recent failure: Lost task 0.3 in stage 23.0 (TID 152, ip-addy, executor 9): org.apache.spark.SparkException: Task failed while writing rows
  39. at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:204)
  40. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$3.apply(FileFormatWriter.scala:129)
  41. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$3.apply(FileFormatWriter.scala:128)
  42. at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
  43. at org.apache.spark.scheduler.Task.run(Task.scala:99)
  44. at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
  45. at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
  46. at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
  47. at java.lang.Thread.run(Thread.java:745)
  48. Caused by: org.apache.spark.SparkException: Failed to execute user defined function($anonfun$createTransformFunc$2: (string) => array<string>)
  49. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)
  50. at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
  51. at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:377)
  52. at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:243)
  53. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:190)
  54. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:188)
  55. at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1341)
  56. at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:193)
  57. ... 8 more
  58. Caused by: java.lang.NullPointerException
  59. at org.apache.spark.ml.feature.RegexTokenizer$$anonfun$createTransformFunc$2.apply(Tokenizer.scala:142)
  60. at org.apache.spark.ml.feature.RegexTokenizer$$anonfun$createTransformFunc$2.apply(Tokenizer.scala:140)
  61. ... 16 more
  62.  
  63. Driver stacktrace:
  64. at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435)
  65. at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423)
  66. at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422)
  67. at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
  68. at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
  69. at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422)
  70. at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
  71. at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
  72. at scala.Option.foreach(Option.scala:257)
  73. at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802)
  74. at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650)
  75. at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605)
  76. at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594)
  77. at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
  78. at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628)
  79. at org.apache.spark.SparkContext.runJob(SparkContext.scala:1918)
  80. at org.apache.spark.SparkContext.runJob(SparkContext.scala:1931)
  81. at org.apache.spark.SparkContext.runJob(SparkContext.scala:1951)
  82. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:127)
  83. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:121)
  84. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:121)
  85. at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57)
  86. at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:121)
  87. at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:101)
  88. at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58)
  89. at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56)
  90. at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74)
  91. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  92. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  93. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135)
  94. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  95. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132)
  96. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113)
  97. at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:87)
  98. at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:87)
  99. at org.apache.spark.sql.execution.datasources.DataSource.write(DataSource.scala:492)
  100. at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:215)
  101. at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:198)
  102. at $line94.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:80)
  103. at $line94.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:85)
  104. at $line94.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:87)
  105. at $line94.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:89)
  106. at $line94.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:91)
  107. at $line94.$read$$iw$$iw$$iw$$iw$$iw.<init>(<console>:93)
  108. at $line94.$read$$iw$$iw$$iw$$iw.<init>(<console>:95)
  109. at $line94.$read$$iw$$iw$$iw.<init>(<console>:97)
  110. at $line94.$read$$iw$$iw.<init>(<console>:99)
  111. at $line94.$read$$iw.<init>(<console>:101)
  112. at $line94.$read.<init>(<console>:103)
  113. at $line94.$read$.<init>(<console>:107)
  114. at $line94.$read$.<clinit>(<console>)
  115. at $line94.$eval$.$print$lzycompute(<console>:7)
  116. at $line94.$eval$.$print(<console>:6)
  117. at $line94.$eval.$print(<console>)
  118. at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
  119. at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
  120. at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  121. at java.lang.reflect.Method.invoke(Method.java:498)
  122. at scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:786)
  123. at scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1047)
  124. at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:638)
  125. at scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:637)
  126. at scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31)
  127. at scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19)
  128. at scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:637)
  129. at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:569)
  130. at scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:565)
  131. at scala.tools.nsc.interpreter.ILoop.interpretStartingWith(ILoop.scala:807)
  132. at scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:681)
  133. at scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:395)
  134. at scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:415)
  135. at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply$mcZ$sp(ILoop.scala:923)
  136. at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909)
  137. at scala.tools.nsc.interpreter.ILoop$$anonfun$process$1.apply(ILoop.scala:909)
  138. at scala.reflect.internal.util.ScalaClassLoader$.savingContextLoader(ScalaClassLoader.scala:97)
  139. at scala.tools.nsc.interpreter.ILoop.process(ILoop.scala:909)
  140. at org.apache.spark.repl.Main$.doMain(Main.scala:68)
  141. at org.apache.spark.repl.Main$.main(Main.scala:51)
  142. at org.apache.spark.repl.Main.main(Main.scala)
  143. at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
  144. at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
  145. at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  146. at java.lang.reflect.Method.invoke(Method.java:498)
  147. at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:738)
  148. at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:187)
  149. at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:212)
  150. at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:126)
  151. at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
  152. Caused by: org.apache.spark.SparkException: Task failed while writing rows
  153. at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:204)
  154. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$3.apply(FileFormatWriter.scala:129)
  155. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$3.apply(FileFormatWriter.scala:128)
  156. at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
  157. at org.apache.spark.scheduler.Task.run(Task.scala:99)
  158. at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
  159. at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
  160. at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
  161. at java.lang.Thread.run(Thread.java:745)
  162. Caused by: org.apache.spark.SparkException: Failed to execute user defined function($anonfun$createTransformFunc$2: (string) => array<string>)
  163. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)
  164. at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
  165. at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:377)
  166. at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:243)
  167. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:190)
  168. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:188)
  169. at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1341)
  170. at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:193)
  171. ... 8 more
  172. Caused by: java.lang.NullPointerException
  173. at org.apache.spark.ml.feature.RegexTokenizer$$anonfun$createTransformFunc$2.apply(Tokenizer.scala:142)
  174. at org.apache.spark.ml.feature.RegexTokenizer$$anonfun$createTransformFunc$2.apply(Tokenizer.scala:140)
  175. ... 16 more
  176. org.apache.spark.SparkException: Job aborted.
  177. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:147)
  178. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:121)
  179. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:121)
  180. at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57)
  181. at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:121)
  182. at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:101)
  183. at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58)
  184. at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56)
  185. at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74)
  186. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  187. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  188. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135)
  189. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  190. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132)
  191. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113)
  192. at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:87)
  193. at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:87)
  194. at org.apache.spark.sql.execution.datasources.DataSource.write(DataSource.scala:492)
  195. at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:215)
  196. at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:198)
  197. ... 50 elided
  198. Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 23.0 failed 4 times, most recent failure: Lost task 0.3 in stage 23.0 (TID 152, ip-addy, executor 9): org.apache.spark.SparkException: Task failed while writing rows
  199. at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:204)
  200. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$3.apply(FileFormatWriter.scala:129)
  201. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$3.apply(FileFormatWriter.scala:128)
  202. at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
  203. at org.apache.spark.scheduler.Task.run(Task.scala:99)
  204. at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
  205. at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
  206. at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
  207. at java.lang.Thread.run(Thread.java:745)
  208. Caused by: org.apache.spark.SparkException: Failed to execute user defined function($anonfun$createTransformFunc$2: (string) => array<string>)
  209. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)
  210. at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
  211. at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:377)
  212. at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:243)
  213. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:190)
  214. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:188)
  215. at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1341)
  216. at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:193)
  217. ... 8 more
  218. Caused by: java.lang.NullPointerException
  219. at org.apache.spark.ml.feature.RegexTokenizer$$anonfun$createTransformFunc$2.apply(Tokenizer.scala:142)
  220. at org.apache.spark.ml.feature.RegexTokenizer$$anonfun$createTransformFunc$2.apply(Tokenizer.scala:140)
  221. ... 16 more
  222.  
  223. Driver stacktrace:
  224. at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435)
  225. at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423)
  226. at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422)
  227. at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
  228. at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
  229. at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422)
  230. at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
  231. at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
  232. at scala.Option.foreach(Option.scala:257)
  233. at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802)
  234. at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650)
  235. at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605)
  236. at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594)
  237. at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
  238. at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628)
  239. at org.apache.spark.SparkContext.runJob(SparkContext.scala:1918)
  240. at org.apache.spark.SparkContext.runJob(SparkContext.scala:1931)
  241. at org.apache.spark.SparkContext.runJob(SparkContext.scala:1951)
  242. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:127)
  243. ... 69 more
  244. Caused by: org.apache.spark.SparkException: Task failed while writing rows
  245. at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:204)
  246. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$3.apply(FileFormatWriter.scala:129)
  247. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1$$anonfun$3.apply(FileFormatWriter.scala:128)
  248. at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
  249. at org.apache.spark.scheduler.Task.run(Task.scala:99)
  250. at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
  251. at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
  252. at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
  253. at java.lang.Thread.run(Thread.java:745)
  254. Caused by: org.apache.spark.SparkException: Failed to execute user defined function($anonfun$createTransformFunc$2: (string) => array<string>)
  255. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)
  256. at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
  257. at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:377)
  258. at org.apache.spark.sql.execution.datasources.FileFormatWriter$SingleDirectoryWriteTask.execute(FileFormatWriter.scala:243)
  259. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:190)
  260. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask$3.apply(FileFormatWriter.scala:188)
  261. at org.apache.spark.util.Utils$.tryWithSafeFinallyAndFailureCallbacks(Utils.scala:1341)
  262. at org.apache.spark.sql.execution.datasources.FileFormatWriter$.org$apache$spark$sql$execution$datasources$FileFormatWriter$$executeTask(FileFormatWriter.scala:193)
  263. ... 8 more
  264. Caused by: java.lang.NullPointerException
  265. at org.apache.spark.ml.feature.RegexTokenizer$$anonfun$createTransformFunc$2.apply(Tokenizer.scala:142)
  266. at org.apache.spark.ml.feature.RegexTokenizer$$anonfun$createTransformFunc$2.apply(Tokenizer.scala:140)
  267. ... 16 more
  268.  
  269. df.printSchema()
Add Comment
Please, Sign In to add comment