Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- scala> val model = kmeans.fit(data)
- 2019-02-03 20:51:41 WARN BlockManager:66 - Putting block rdd_126_1 failed due to exception org.apache.spark.SparkException: Failed to execute user defined function($anonfun$4: (struct<weight:double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>).
- 2019-02-03 20:51:41 WARN BlockManager:66 - Block rdd_126_1 could not be removed as it was not found on disk or in memory
- 2019-02-03 20:51:41 WARN BlockManager:66 - Putting block rdd_126_2 failed due to exception org.apache.spark.SparkException: Failed to execute user defined function($anonfun$4: (struct<weight:double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>).
- 2019-02-03 20:51:41 ERROR Executor:91 - Exception in task 1.0 in stage 16.0 (TID 23)
- org.apache.spark.SparkException: Failed to execute user defined function($anonfun$4: (struct<weight:double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>)
- at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
- at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
- at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$11$$anon$1.hasNext(WholeStageCodegenExec.scala:619)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
- at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:220)
- at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:298)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1165)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156)
- at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091)
- at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156)
- at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882)
- at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
- at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)
- at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)
- at org.apache.spark.scheduler.Task.run(Task.scala:121)
- at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402)
- at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
- at java.lang.Thread.run(Thread.java:748)
- Caused by: org.apache.spark.SparkException: Encountered null while assembling a row with handleInvalid = "keep". Consider
- removing nulls from dataset or using handleInvalid = "keep" or "skip".
- at org.apache.spark.ml.feature.VectorAssembler$$anonfun$assemble$1.apply(VectorAssembler.scala:287)
- at org.apache.spark.ml.feature.VectorAssembler$$anonfun$assemble$1.apply(VectorAssembler.scala:255)
- at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
- at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)
- at org.apache.spark.ml.feature.VectorAssembler$.assemble(VectorAssembler.scala:255)
- at org.apache.spark.ml.feature.VectorAssembler$$anonfun$4.apply(VectorAssembler.scala:144)
- at org.apache.spark.ml.feature.VectorAssembler$$anonfun$4.apply(VectorAssembler.scala:143)
- ... 29 more
- 2019-02-03 20:51:41 WARN BlockManager:66 - Putting block rdd_126_0 failed due to exception org.apache.spark.SparkException: Failed to execute user defined function($anonfun$4: (struct<weight:double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>).ala:882)
- 2019-02-03 20:51:41 WARN TaskSetManager:66 - Lost task 1.0 in stage 16.0 (TID 23, localhost, executor driver): org.apache.spark.SparkException: Failed to execute user defined function($anonfun$4: (struct<weight:double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>)
- at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
- at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
- at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$11$$anon$1.hasNext(WholeStageCodegenExec.scala:619)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)ecutor.java:1149)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)xecutor.java:624)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
- at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:220)handleInvalid = "keep". Consider
- at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:298)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1165)87)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156)55)
- at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091)ed.scala:33)
- at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156)49)
- at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882)55)
- at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)$4.apply(VectorAssembler.scala:144)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)nfun$4.apply(VectorAssembler.scala:143)alid = "keep". Consider
- at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) due to exception org.apache.spark.TaskKilledException.
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) could not be removed as it was not found on disk or in memory
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)ler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1887)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)eduler.scala:1875)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)$1.apply(DAGScheduler.scala:1874)
- at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90))Assembler.scala:144)
- at org.apache.spark.scheduler.Task.run(Task.scala:121):48)apply(VectorAssembler.scala:143)
- at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402)
- at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)(DAGScheduler.scala:926)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408)Scheduler.scala:926)double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)degenStage2.processNext(Unknown Source)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)java:43)
- at java.lang.Thread.run(Thread.java:748)essLoop.doOnReceive(DAGScheduler.scala:2108)WholeStageCodegenExec.scala:619)
- Caused by: org.apache.spark.SparkException: Encountered null while assembling a row with handleInvalid = "keep". Consider
- removing nulls from dataset or using handleInvalid = "keep" or "skip".cheduler.scala:2046)
- at org.apache.spark.ml.feature.VectorAssembler$$anonfun$assemble$1.apply(VectorAssembler.scala:287)
- at org.apache.spark.ml.feature.VectorAssembler$$anonfun$assemble$1.apply(VectorAssembler.scala:255)
- at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)a:298)
- at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)(BlockManager.scala:1165)
- at org.apache.spark.ml.feature.VectorAssembler$.assemble(VectorAssembler.scala:255)r.scala:1156)
- at org.apache.spark.ml.feature.VectorAssembler$$anonfun$4.apply(VectorAssembler.scala:144)
- at org.apache.spark.ml.feature.VectorAssembler$$anonfun$4.apply(VectorAssembler.scala:143)
- ... 29 mored.RDD$$anonfun$takeSample$1.apply(RDD.scala:572)ockManager.scala:882)
- org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- 2019-02-03 20:51:41 ERROR TaskSetManager:70 - Task 1 in stage 16.0 failed 1 times; aborting job
- 2019-02-03 20:51:41 WARN BlockManager:66 - Block rdd_126_2 could not be removed as it was not found on disk or in memory
- 2019-02-03 20:51:41 WARN BlockManager:66 - Block rdd_126_0 could not be removed as it was not found on disk or in memory
- 2019-02-03 20:51:41 ERROR Instrumentation:70 - org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1887)
- org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1875)
- org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1874)
- scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
- scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)scala:340))
- org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1874)scala:183)
- org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
- org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
- scala.Option.foreach(Option.scala:257)fit(KMeans.scala:340)n(Executor.scala:408)
- org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926)le>:37)
- org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2108)
- org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2057)
- org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2046)andleInvalid = "keep". Consider
- org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)onsole>:48)
- org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737)le$1.apply(VectorAssembler.scala:287)
- org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)>:52)semble$1.apply(VectorAssembler.scala:255)
- org.apache.spark.SparkContext.runJob(SparkContext.scala:2082))IndexedSeqOptimized.scala:33)
- org.apache.spark.SparkContext.runJob(SparkContext.scala:2101)pedArray.scala:35)
- org.apache.spark.SparkContext.runJob(SparkContext.scala:2126)ble(VectorAssembler.scala:255)
- org.apache.spark.rdd.RDD.count(RDD.scala:1168):60)bler$$anonfun$4.apply(VectorAssembler.scala:144)
- org.apache.spark.rdd.RDD$$anonfun$takeSample$1.apply(RDD.scala:572)pply(VectorAssembler.scala:143)
- org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)mes; aborting job
- org.apache.spark.rdd.RDD.withScope(RDD.scala:363)g.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1887)
- org.apache.spark.rdd.RDD.takeSample(RDD.scala:561)bortStage$1.apply(DAGScheduler.scala:1875)
- org.apache.spark.mllib.clustering.KMeans.initKMeansParallel(KMeans.scala:386)ler.scala:1874)
- org.apache.spark.mllib.clustering.KMeans.runAlgorithm(KMeans.scala:282)ala:59)
- org.apache.spark.mllib.clustering.KMeans.run(KMeans.scala:251):48)
- org.apache.spark.ml.clustering.KMeans$$anonfun$fit$1.apply(KMeans.scala:362)
- org.apache.spark.ml.clustering.KMeans$$anonfun$fit$1.apply(KMeans.scala:340)(DAGScheduler.scala:926)
- org.apache.spark.ml.util.Instrumentation$$anonfun$11.apply(Instrumentation.scala:183)uler.scala:926)
- scala.util.Try$.apply(Try.scala:192)Impl.invoke(DelegatingMethodAccessorImpl.java:43)
- org.apache.spark.ml.util.Instrumentation$.instrumented(Instrumentation.scala:183)6)
- org.apache.spark.ml.clustering.KMeans.fit(KMeans.scala:340)scala:793)AGScheduler.scala:2108)
- $line66.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:37)
- $line66.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:42)scala:645)
- $line66.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:44)ain.scala:644)
- $line66.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:46)scala:31)
- $line66.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:48)ClassLoader.scala:19)
- $line66.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:50)cala:644)
- $line66.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:52)
- $line66.$read$$iw$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:54)
- $line66.$read$$iw$$iw$$iw$$iw$$iw$$iw.<init>(<console>:56)oop.scala:819)
- $line66.$read$$iw$$iw$$iw$$iw$$iw.<init>(<console>:58)691)cala:572)
- $line66.$read$$iw$$iw$$iw$$iw.<init>(<console>:60)p.scala:404)Scope.scala:151)
- $line66.$read$$iw$$iw$$iw.<init>(<console>:62)cala:425)erationScope.scala:112)
- $line66.$read$$iw$$iw.<init>(<console>:64)process$1.apply$mcZ$sp(SparkILoop.scala:285)
- $line66.$read$$iw.<init>(<console>:66)osure(SparkILoop.scala:159)
- $line66.$read.<init>(<console>:68)rocess(SparkILoop.scala:182)eans.scala:386)
- $line66.$read$.<init>(<console>:72)Main.scala:78)ithm(KMeans.scala:282)
- $line66.$read$.<clinit>(<console>)ain.scala:58)eans.scala:251)
- $line66.$eval$.$print$lzycompute(<console>:7)n$fit$1.apply(KMeans.scala:362)
- $line66.$eval$.$print(<console>:6)pl.invoke0(Native Method)KMeans.scala:340)
- $line66.$eval.$print(<console>)rImpl.invoke(NativeMethodAccessorImpl.java:62)ala:183)
- sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)ethodAccessorImpl.java:43)
- sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)183)
- sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
- java.lang.reflect.Method.invoke(Method.java:498)park$deploy$SparkSubmit$$runMain(SparkSubmit.scala:849)
- scala.tools.nsc.interpreter.IMain$ReadEvalPrint.call(IMain.scala:793))t>(<console>:42)
- scala.tools.nsc.interpreter.IMain$Request.loadAndRun(IMain.scala:1054)console>:44)
- scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:645)
- scala.tools.nsc.interpreter.IMain$WrappedRequest$$anonfun$loadAndRunReq$1.apply(IMain.scala:644)
- scala.reflect.internal.util.ScalaClassLoader$class.asContext(ScalaClassLoader.scala:31)
- scala.reflect.internal.util.AbstractFileClassLoader.asContext(AbstractFileClassLoader.scala:19)
- scala.tools.nsc.interpreter.IMain$WrappedRequest.loadAndRunReq(IMain.scala:644)17, localhost, executor driver): TaskKilled (Stage cancelled)
- scala.tools.nsc.interpreter.IMain.interpret(IMain.scala:576)ilure: Task 2 in stage 14.0 failed 1 times, most recent failure: Lost task 2.0 in stage 14.0 (TID 19, localhost, executor driver): org.apache.spark.Spscala.tools.nsc.interpreter.IMain.interpret(IMain.scala:572)un$4: (struct<weight:double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>)
- scala.tools.nsc.interpreter.ILoop.interpretStartingWith(ILoop.scala:819)ratedIteratorForCodegenStage2.processNext(Unknown Source)
- scala.tools.nsc.interpreter.ILoop.command(ILoop.scala:691)tor.hasNext(BufferedRowIterator.java:43)
- scala.tools.nsc.interpreter.ILoop.processLine(ILoop.scala:404)c$$anonfun$11$$anon$1.hasNext(WholeStageCodegenExec.scala:619)
- scala.tools.nsc.interpreter.ILoop.loop(ILoop.scala:425)terator.scala:409)
- org.apache.spark.repl.SparkILoop$$anonfun$process$1.apply$mcZ$sp(SparkILoop.scala:285)
- org.apache.spark.repl.SparkILoop.runClosure(SparkILoop.scala:159)ala:409)
- org.apache.spark.repl.SparkILoop.process(SparkILoop.scala:182)ator(MemoryStore.scala:220)
- org.apache.spark.repl.Main$.doMain(Main.scala:78)Store.putIteratorAsValues(MemoryStore.scala:298)
- org.apache.spark.repl.Main$.main(Main.scala:58)r$$anonfun$doPutIterator$1.apply(BlockManager.scala:1165)
- org.apache.spark.repl.Main.main(Main.scala)nager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156)
- sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)kManager.scala:1091)
- sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)cala:1156)
- sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)82)
- java.lang.reflect.Method.invoke(Method.java:498)(RDD.scala:335)
- org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52)
- org.apache.spark.deploy.SparkSubmit.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:849)
- org.apache.spark.deploy.SparkSubmit.doRunMain$1(SparkSubmit.scala:167)324)apply(IMain.scala:645)
- org.apache.spark.deploy.SparkSubmit.submit(SparkSubmit.scala:195)RunReq$1.apply(IMain.scala:644)
- org.apache.spark.deploy.SparkSubmit.doSubmit(SparkSubmit.scala:86)ionsRDD.scala:52):31)
- org.apache.spark.deploy.SparkSubmit$$anon$2.doSubmit(SparkSubmit.scala:924)lassLoader.scala:19)
- org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:933)Main.scala:644)
- org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)sultTask.scala:90)
- 2019-02-03 20:51:41 WARN TaskSetManager:66 - Lost task 0.0 in stage 16.0 (TID 22, localhost, executor driver): TaskKilled (Stage cancelled)
- 2019-02-03 20:51:41 WARN TaskSetManager:66 - Lost task 2.0 in stage 16.0 (TID 24, localhost, executor driver): TaskKilled (Stage cancelled)
- org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 16.0 failed 1 times, most recent failure: Lost task 1.0 in stage 16.0 (TID 23, localhost, executor driver): org.apache.spark.SparkException: Failed to execute user defined function($anonfun$4: (struct<weight:double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>)
- at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source)
- at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
- at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$11$$anon$1.hasNext(WholeStageCodegenExec.scala:619)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)ling a row with handleInvalid = "keep". Consider
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)1.apply(VectorAssembler.scala:287)
- at org.apache.spark.storage.memory.MemoryStore.putIterator(MemoryStore.scala:220)sembler.scala:255)
- at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:298)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1165)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1156)
- at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1091)ssembler.scala:144)
- at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1156).scala:143)
- at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:882)bmit.scala:849)
- at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)la:167)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)a:195)
- at org.apache.spark.rdd.ZippedPartitionsRDD2.compute(ZippedPartitionsRDD.scala:89)JobAndIndependentStages(DAGScheduler.scala:1887)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)AGScheduler.scala:1875)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)Stage$1.apply(DAGScheduler.scala:1874)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324)in stage 11.0 failed 1 times, most recent failure: Lost task 2.0 in stage 11.0 (TID 13, localhost, executor driver): org.apache.spark.Sp at org.apache.spark.rdd.RDD.iterator(RDD.scala:288)Scheduler.scala:1874):double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>)
- at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90)pply(DAGScheduler.scala:926)cessNext(Unknown Source)
- at org.apache.spark.scheduler.Task.run(Task.scala:121)skSetFailed$1.apply(DAGScheduler.scala:926)
- at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402)oleStageCodegenExec.scala:619)
- at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360)r.scala:926)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408)duler.scala:2108)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)2057)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)2046)
- at java.lang.Thread.run(Thread.java:748)(EventLoop.scala:49)Values(MemoryStore.scala:298)
- Caused by: org.apache.spark.SparkException: Encountered null while assembling a row with handleInvalid = "keep". Consider
- removing nulls from dataset or using handleInvalid = "keep" or "skip".r$1.apply(BlockManager.scala:1156)
- at org.apache.spark.ml.feature.VectorAssembler$$anonfun$assemble$1.apply(VectorAssembler.scala:287)
- at org.apache.spark.ml.feature.VectorAssembler$$anonfun$assemble$1.apply(VectorAssembler.scala:255)
- at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
- at scala.collection.mutable.WrappedArray.foreach(WrappedArray.scala:35)
- at org.apache.spark.ml.feature.VectorAssembler$.assemble(VectorAssembler.scala:255)
- at org.apache.spark.ml.feature.VectorAssembler$$anonfun$4.apply(VectorAssembler.scala:144)
- at org.apache.spark.ml.feature.VectorAssembler$$anonfun$4.apply(VectorAssembler.scala:143)
- ... 29 morerk.rdd.RDD.withScope(RDD.scala:363):288)
- at org.apache.spark.rdd.RDD.takeSample(RDD.scala:561)e(MapPartitionsRDD.scala:52)
- Driver stacktrace:ark.mllib.clustering.KMeans.initKMeansParallel(KMeans.scala:386)
- at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1887)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1875)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1874)
- at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59).scala:402)
- at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)entation.scala:183)
- at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1874)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926)
- at scala.Option.foreach(Option.scala:257):748)
- at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926)nfun$4: (struct<weight:double,freight:double>) => struct<type:tinyint,size:int,indices:array<int>,values:array<double>>)
- at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2108)rocessNext(Unknown Source)
- at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2057)r.scala:287)
- at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2046)eCodegenExec.scala:619)
- at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)dSeqOptimized.scala:33)
- at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737):35)
- at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061))ctorAssembler.scala:255)
- at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)yStore.scala:220)bler.scala:144)
- at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101)es(MemoryStore.scala:298)la:143)
- at org.apache.spark.SparkContext.runJob(SparkContext.scala:2126)1.apply(BlockManager.scala:1165)
- at org.apache.spark.rdd.RDD.count(RDD.scala:1168)$doPutIterator$1.apply(BlockManager.scala:1156)
- at org.apache.spark.rdd.RDD$$anonfun$takeSample$1.apply(RDD.scala:572))
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)r$$failJobAndIndependentStages(DAGScheduler.scala:1887)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)er.scala:1875)
- at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)35)rtStage$1.apply(DAGScheduler.scala:1874)
- at org.apache.spark.rdd.RDD.takeSample(RDD.scala:561)ach(ResizableArray.scala:59)
- at org.apache.spark.mllib.clustering.KMeans.initKMeansParallel(KMeans.scala:386)9)
- at org.apache.spark.mllib.clustering.KMeans.runAlgorithm(KMeans.scala:282)874)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement