Advertisement
Guest User

Untitled

a guest
Feb 3rd, 2019
508
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 25.25 KB | None | 0 0
  1. scala> val model = kmeans.fit(data)
  2. 2019-02-03 20:51:41 WARN BlockManager:66 - Putting block rdd_126_1 failed due to exception org.apache.spark.SparkException: Failed to execute user defined function($anonfun$4: (struct<weight:double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>).
  3. 2019-02-03 20:51:41 WARN BlockManager:66 - Block rdd_126_1 could not be removed as it was not found on disk or in memory
  4. 2019-02-03 20:51:41 WARN BlockManager:66 - Putting block rdd_126_2 failed due to exception org.apache.spark.SparkException: Failed to execute user defined function($anonfun$4: (struct<weight:double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>).
  5. 2019-02-03 20:51:41 ERROR Executor:91 - Exception in task 1.0 in stage 16.0 (TID 23)
  6. org.apache.spark.SparkException: Failed to execute user defined function($anonfun$4: (struct<weight:double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>)
  7. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
  8. at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
  9. at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$11$$anon$1.hasNext(WholeStageCodegenExec.scala:619)
  10. at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
  11. at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
  12. at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
  13. at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:220)
  14. at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:298)
  15. at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1165)
  16. at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156)
  17. at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091)
  18. at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156)
  19. at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882)
  20. at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
  21. at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
  22. at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
  23. at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
  24. at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
  25. at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
  26. at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
  27. at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
  28. at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
  29. at org.apache.spark.scheduler.Task.run(Task.scala:121)
  30. at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402)
  31. at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
  32. at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408)
  33. at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
  34. at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
  35. at java.lang.Thread.run(Thread.java:748)
  36. Caused by: org.apache.spark.SparkException: Encountered null while assembling a row with handleInvalid = "keep". Consider
  37. removing nulls from dataset or using handleInvalid = "keep" or "skip".
  38. at org.apache.spark.ml.feature.VectorAssembler$$anonfun$assemble$1.apply(VectorAssembler.scala:287)
  39. at org.apache.spark.ml.feature.VectorAssembler$$anonfun$assemble$1.apply(VectorAssembler.scala:255)
  40. at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
  41. at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)
  42. at org.apache.spark.ml.feature.VectorAssembler$.assemble(VectorAssembler.scala:255)
  43. at org.apache.spark.ml.feature.VectorAssembler$$anonfun$4.apply(VectorAssembler.scala:144)
  44. at org.apache.spark.ml.feature.VectorAssembler$$anonfun$4.apply(VectorAssembler.scala:143)
  45. ... 29 more
  46. 2019-02-03 20:51:41 WARN BlockManager:66 - Putting block rdd_126_0 failed due to exception org.apache.spark.SparkException: Failed to execute user defined function($anonfun$4: (struct<weight:double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>).ala:882)
  47. 2019-02-03 20:51:41 WARN TaskSetManager:66 - Lost task 1.0 in stage 16.0 (TID 23, localhost, executor driver): org.apache.spark.SparkException: Failed to execute user defined function($anonfun$4: (struct<weight:double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>)
  48. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
  49. at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
  50. at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$11$$anon$1.hasNext(WholeStageCodegenExec.scala:619)
  51. at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)ecutor.java:1149)
  52. at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)xecutor.java:624)
  53. at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
  54. at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:220)handleInvalid = "keep". Consider
  55. at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:298)
  56. at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1165)87)
  57. at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156)55)
  58. at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091)ed.scala:33)
  59. at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156)49)
  60. at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882)55)
  61. at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)$4.apply(VectorAssembler.scala:144)
  62. at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)nfun$4.apply(VectorAssembler.scala:143)alid = "keep". Consider
  63. at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
  64. at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) due to exception org.apache.spark.TaskKilledException.
  65. at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) could not be removed as it was not found on disk or in memory
  66. at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)ler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1887)
  67. at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)eduler.scala:1875)
  68. at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)$1.apply(DAGScheduler.scala:1874)
  69. at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90))Assembler.scala:144)
  70. at org.apache.spark.scheduler.Task.run(Task.scala:121):48)apply(VectorAssembler.scala:143)
  71. at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402)
  72. at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)(DAGScheduler.scala:926)
  73. at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408)Scheduler.scala:926)double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>)
  74. at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)degenStage2.processNext(Unknown Source)
  75. at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)java:43)
  76. at java.lang.Thread.run(Thread.java:748)essLoop.doOnReceive(DAGScheduler.scala:2108)WholeStageCodegenExec.scala:619)
  77. Caused by: org.apache.spark.SparkException: Encountered null while assembling a row with handleInvalid = "keep". Consider
  78. removing nulls from dataset or using handleInvalid = "keep" or "skip".cheduler.scala:2046)
  79. at org.apache.spark.ml.feature.VectorAssembler$$anonfun$assemble$1.apply(VectorAssembler.scala:287)
  80. at org.apache.spark.ml.feature.VectorAssembler$$anonfun$assemble$1.apply(VectorAssembler.scala:255)
  81. at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)a:298)
  82. at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)(BlockManager.scala:1165)
  83. at org.apache.spark.ml.feature.VectorAssembler$.assemble(VectorAssembler.scala:255)r.scala:1156)
  84. at org.apache.spark.ml.feature.VectorAssembler$$anonfun$4.apply(VectorAssembler.scala:144)
  85. at org.apache.spark.ml.feature.VectorAssembler$$anonfun$4.apply(VectorAssembler.scala:143)
  86. ... 29 mored.RDD$$anonfun$takeSample$1.apply(RDD.scala:572)ockManager.scala:882)
  87. org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  88. 2019-02-03 20:51:41 ERROR TaskSetManager:70 - Task 1 in stage 16.0 failed 1 times; aborting job
  89. 2019-02-03 20:51:41 WARN BlockManager:66 - Block rdd_126_2 could not be removed as it was not found on disk or in memory
  90. 2019-02-03 20:51:41 WARN BlockManager:66 - Block rdd_126_0 could not be removed as it was not found on disk or in memory
  91. 2019-02-03 20:51:41 ERROR Instrumentation:70 - org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1887)
  92. org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1875)
  93. org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1874)
  94. scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
  95. scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)scala:340))
  96. org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1874)scala:183)
  97. org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
  98. org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
  99. scala.Option.foreach(Option.scala:257)fit(KMeans.scala:340)n(Executor.scala:408)
  100. org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926)le>:37)
  101. org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2108)
  102. org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2057)
  103. org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2046)andleInvalid = "keep". Consider
  104. org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)onsole>:48)
  105. org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737)le$1.apply(VectorAssembler.scala:287)
  106. org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)>:52)semble$1.apply(VectorAssembler.scala:255)
  107. org.apache.spark.SparkContext.runJob(SparkContext.scala:2082))IndexedSeqOptimized.scala:33)
  108. org.apache.spark.SparkContext.runJob(SparkContext.scala:2101)pedArray.scala:35)
  109. org.apache.spark.SparkContext.runJob(SparkContext.scala:2126)ble(VectorAssembler.scala:255)
  110. org.apache.spark.rdd.RDD.count(RDD.scala:1168):60)bler$$anonfun$4.apply(VectorAssembler.scala:144)
  111. org.apache.spark.rdd.RDD$$anonfun$takeSample$1.apply(RDD.scala:572)pply(VectorAssembler.scala:143)
  112. org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  113. org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)mes; aborting job
  114. org.apache.spark.rdd.RDD.withScope(RDD.scala:363)g.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1887)
  115. org.apache.spark.rdd.RDD.takeSample(RDD.scala:561)bortStage$1.apply(DAGScheduler.scala:1875)
  116. org.apache.spark.mllib.clustering.KMeans.initKMeansParallel(KMeans.scala:386)ler.scala:1874)
  117. org.apache.spark.mllib.clustering.KMeans.runAlgorithm(KMeans.scala:282)ala:59)
  118. org.apache.spark.mllib.clustering.KMeans.run(KMeans.scala:251):48)
  119. org.apache.spark.ml.clustering.KMeans$$anonfun$fit$1.apply(KMeans.scala:362)
  120. org.apache.spark.ml.clustering.KMeans$$anonfun$fit$1.apply(KMeans.scala:340)(DAGScheduler.scala:926)
  121. org.apache.spark.ml.util.Instrumentation$$anonfun$11.apply(Instrumentation.scala:183)uler.scala:926)
  122. scala.util.Try$.apply(Try.scala:192)Impl.invoke(DelegatingMethodAccessorImpl.java:43)
  123. org.apache.spark.ml.util.Instrumentation$.instrumented(Instrumentation.scala:183)6)
  124. org.apache.spark.ml.clustering.KMeans.fit(KMeans.scala:340)scala:793)AGScheduler.scala:2108)
  125. $line66.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:37)
  126. $line66.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:42)scala:645)
  127. $line66.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:44)ain.scala:644)
  128. $line66.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:46)scala:31)
  129. $line66.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:48)ClassLoader.scala:19)
  130. $line66.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:50)cala:644)
  131. $line66.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:52)
  132. $line66.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:54)
  133. $line66.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:56)oop.scala:819)
  134. $line66.$read$$iw$$iw$$iw$$iw$$iw.<init>(<console>:58)691)cala:572)
  135. $line66.$read$$iw$$iw$$iw$$iw.<init>(<console>:60)p.scala:404)Scope.scala:151)
  136. $line66.$read$$iw$$iw$$iw.<init>(<console>:62)cala:425)erationScope.scala:112)
  137. $line66.$read$$iw$$iw.<init>(<console>:64)process$1.apply$mcZ$sp(SparkILoop.scala:285)
  138. $line66.$read$$iw.<init>(<console>:66)osure(SparkILoop.scala:159)
  139. $line66.$read.<init>(<console>:68)rocess(SparkILoop.scala:182)eans.scala:386)
  140. $line66.$read$.<init>(<console>:72)Main.scala:78)ithm(KMeans.scala:282)
  141. $line66.$read$.<clinit>(<console>)ain.scala:58)eans.scala:251)
  142. $line66.$eval$.$print$lzycompute(<console>:7)n$fit$1.apply(KMeans.scala:362)
  143. $line66.$eval$.$print(<console>:6)pl.invoke0(Native Method)KMeans.scala:340)
  144. $line66.$eval.$print(<console>)rImpl.invoke(NativeMethodAccessorImpl.java:62)ala:183)
  145. sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)ethodAccessorImpl.java:43)
  146. sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)183)
  147. sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  148. java.lang.reflect.Method.invoke(Method.java:498)park$deploy$SparkSubmit$$runMain(SparkSubmit.scala:849)
  149. scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:793))t>(<console>:42)
  150. scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1054)console>:44)
  151. scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:645)
  152. scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:644)
  153. scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31)
  154. scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19)
  155. scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:644)17, localhost, executor driver): TaskKilled (Stage cancelled)
  156. scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:576)ilure: Task 2 in stage 14.0 failed 1 times, most recent failure: Lost task 2.0 in stage 14.0 (TID 19, localhost, executor driver): org.apache.spark.Spscala.tools.nsc.interpreter.IMain.interpret(IMain.scala:572)un$4: (struct<weight:double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>)
  157. scala.tools.nsc.interpreter.ILoop.interpretStartingWith(ILoop.scala:819)ratedIteratorForCodegenStage2.processNext(Unknown Source)
  158. scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:691)tor.hasNext(BufferedRowIterator.java:43)
  159. scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:404)c$$anonfun$11$$anon$1.hasNext(WholeStageCodegenExec.scala:619)
  160. scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:425)terator.scala:409)
  161. org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:285)
  162. org.apache.spark.repl.SparkILoop.runClosure(SparkILoop.scala:159)ala:409)
  163. org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:182)ator(MemoryStore.scala:220)
  164. org.apache.spark.repl.Main$.doMain(Main.scala:78)Store.putIteratorAsValues(MemoryStore.scala:298)
  165. org.apache.spark.repl.Main$.main(Main.scala:58)r$$anonfun$doPutIterator$1.apply(BlockManager.scala:1165)
  166. org.apache.spark.repl.Main.main(Main.scala)nager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156)
  167. sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)kManager.scala:1091)
  168. sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)cala:1156)
  169. sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)82)
  170. java.lang.reflect.Method.invoke(Method.java:498)(RDD.scala:335)
  171. org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
  172. org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:849)
  173. org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:167)324)apply(IMain.scala:645)
  174. org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:195)RunReq$1.apply(IMain.scala:644)
  175. org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)ionsRDD.scala:52):31)
  176. org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:924)lassLoader.scala:19)
  177. org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:933)Main.scala:644)
  178. org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)sultTask.scala:90)
  179. 2019-02-03 20:51:41 WARN TaskSetManager:66 - Lost task 0.0 in stage 16.0 (TID 22, localhost, executor driver): TaskKilled (Stage cancelled)
  180. 2019-02-03 20:51:41 WARN TaskSetManager:66 - Lost task 2.0 in stage 16.0 (TID 24, localhost, executor driver): TaskKilled (Stage cancelled)
  181. org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 16.0 failed 1 times, most recent failure: Lost task 1.0 in stage 16.0 (TID 23, localhost, executor driver): org.apache.spark.SparkException: Failed to execute user defined function($anonfun$4: (struct<weight:double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>)
  182. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
  183. at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
  184. at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$11$$anon$1.hasNext(WholeStageCodegenExec.scala:619)
  185. at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)ling a row with handleInvalid = "keep". Consider
  186. at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
  187. at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)1.apply(VectorAssembler.scala:287)
  188. at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:220)sembler.scala:255)
  189. at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:298)
  190. at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1165)
  191. at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156)
  192. at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091)ssembler.scala:144)
  193. at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156).scala:143)
  194. at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882)bmit.scala:849)
  195. at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)la:167)
  196. at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)a:195)
  197. at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)JobAndIndependentStages(DAGScheduler.scala:1887)
  198. at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)AGScheduler.scala:1875)
  199. at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)Stage$1.apply(DAGScheduler.scala:1874)
  200. at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
  201. at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)in stage 11.0 failed 1 times, most recent failure: Lost task 2.0 in stage 11.0 (TID 13, localhost, executor driver): org.apache.spark.Sp at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)Scheduler.scala:1874):double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>)
  202. at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)pply(DAGScheduler.scala:926)cessNext(Unknown Source)
  203. at org.apache.spark.scheduler.Task.run(Task.scala:121)skSetFailed$1.apply(DAGScheduler.scala:926)
  204. at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402)oleStageCodegenExec.scala:619)
  205. at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)r.scala:926)
  206. at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408)duler.scala:2108)
  207. at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)2057)
  208. at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)2046)
  209. at java.lang.Thread.run(Thread.java:748)(EventLoop.scala:49)Values(MemoryStore.scala:298)
  210. Caused by: org.apache.spark.SparkException: Encountered null while assembling a row with handleInvalid = "keep". Consider
  211. removing nulls from dataset or using handleInvalid = "keep" or "skip".r$1.apply(BlockManager.scala:1156)
  212. at org.apache.spark.ml.feature.VectorAssembler$$anonfun$assemble$1.apply(VectorAssembler.scala:287)
  213. at org.apache.spark.ml.feature.VectorAssembler$$anonfun$assemble$1.apply(VectorAssembler.scala:255)
  214. at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
  215. at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)
  216. at org.apache.spark.ml.feature.VectorAssembler$.assemble(VectorAssembler.scala:255)
  217. at org.apache.spark.ml.feature.VectorAssembler$$anonfun$4.apply(VectorAssembler.scala:144)
  218. at org.apache.spark.ml.feature.VectorAssembler$$anonfun$4.apply(VectorAssembler.scala:143)
  219. ... 29 morerk.rdd.RDD.withScope(RDD.scala:363):288)
  220. at org.apache.spark.rdd.RDD.takeSample(RDD.scala:561)e(MapPartitionsRDD.scala:52)
  221. Driver stacktrace:ark.mllib.clustering.KMeans.initKMeansParallel(KMeans.scala:386)
  222. at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1887)
  223. at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1875)
  224. at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1874)
  225. at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59).scala:402)
  226. at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)entation.scala:183)
  227. at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1874)
  228. at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
  229. at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
  230. at scala.Option.foreach(Option.scala:257):748)
  231. at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926)nfun$4: (struct<weight:double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>)
  232. at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2108)rocessNext(Unknown Source)
  233. at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2057)r.scala:287)
  234. at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2046)eCodegenExec.scala:619)
  235. at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)dSeqOptimized.scala:33)
  236. at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737):35)
  237. at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061))ctorAssembler.scala:255)
  238. at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)yStore.scala:220)bler.scala:144)
  239. at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101)es(MemoryStore.scala:298)la:143)
  240. at org.apache.spark.SparkContext.runJob(SparkContext.scala:2126)1.apply(BlockManager.scala:1165)
  241. at org.apache.spark.rdd.RDD.count(RDD.scala:1168)$doPutIterator$1.apply(BlockManager.scala:1156)
  242. at org.apache.spark.rdd.RDD$$anonfun$takeSample$1.apply(RDD.scala:572))
  243. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)r$$failJobAndIndependentStages(DAGScheduler.scala:1887)
  244. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)er.scala:1875)
  245. at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)35)rtStage$1.apply(DAGScheduler.scala:1874)
  246. at org.apache.spark.rdd.RDD.takeSample(RDD.scala:561)ach(ResizableArray.scala:59)
  247. at org.apache.spark.mllib.clustering.KMeans.initKMeansParallel(KMeans.scala:386)9)
  248. at org.apache.spark.mllib.clustering.KMeans.runAlgorithm(KMeans.scala:282)874)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement