Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- spark-submit --packages com.databricks:spark-csv_2.11:1.5.0 ./adiel.py
- 17/01/20 09:29:30 INFO SparkContext: Running Spark version 2.0.2
- 17/01/20 09:29:31 INFO SecurityManager: Changing view acls to: hadoop
- 17/01/20 09:29:31 INFO SecurityManager: Changing modify acls to: hadoop
- 17/01/20 09:29:31 INFO SecurityManager: Changing view acls groups to:
- 17/01/20 09:29:31 INFO SecurityManager: Changing modify acls groups to:
- 17/01/20 09:29:31 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(hadoop); groups with view permissions: Set(); users with modify permissions: Set(hadoop); groups with modify permissions: Set()
- 17/01/20 09:29:31 INFO Utils: Successfully started service 'sparkDriver' on port 35719.
- 17/01/20 09:29:31 INFO SparkEnv: Registering MapOutputTracker
- 17/01/20 09:29:31 INFO SparkEnv: Registering BlockManagerMaster
- 17/01/20 09:29:31 INFO DiskBlockManager: Created local directory at /mnt/tmp/blockmgr-0badf595-8f56-45ef-bdf5-f80573d2e188
- 17/01/20 09:29:31 INFO MemoryStore: MemoryStore started with capacity 414.4 MB
- 17/01/20 09:29:31 INFO SparkEnv: Registering OutputCommitCoordinator
- 17/01/20 09:29:32 WARN Utils: Service 'SparkUI' could not bind on port 4040. Attempting port 4041.
- 17/01/20 09:29:32 INFO Utils: Successfully started service 'SparkUI' on port 4041.
- 17/01/20 09:29:32 INFO SparkUI: Bound SparkUI to 0.0.0.0, and started at http://172.31.35.244:4041
- 17/01/20 09:29:32 INFO Executor: Starting executor ID driver on host localhost
- 17/01/20 09:29:32 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 35372.
- 17/01/20 09:29:32 INFO NettyBlockTransferService: Server created on 172.31.35.244:35372
- 17/01/20 09:29:32 INFO BlockManager: external shuffle service port = 7337
- 17/01/20 09:29:32 INFO BlockManagerMaster: Registering BlockManager BlockManagerId(driver, 172.31.35.244, 35372)
- 17/01/20 09:29:32 INFO BlockManagerMasterEndpoint: Registering block manager 172.31.35.244:35372 with 414.4 MB RAM, BlockManagerId(driver, 172.31.35.244, 35372)
- 17/01/20 09:29:32 INFO BlockManagerMaster: Registered BlockManager BlockManagerId(driver, 172.31.35.244, 35372)
- ** Script Started: 2017-01-20 09:29:34.115886 **
- Loading file... Done!
- Adjusting data to fit our needs... done!
- ** DOWNSTREAM_SIZE Statistical Measures **
- 17/01/20 09:36:58 ERROR Executor: Exception in task 2.0 in stage 3.0 (TID 160)
- java.lang.NullPointerException
- at java.text.DecimalFormat.parse(DecimalFormat.java:1997)
- at java.text.NumberFormat.parse(NumberFormat.java:383)
- at org.apache.spark.sql.execution.datasources.csv.CSVTypeCast$$anonfun$castTo$4.apply$mcD$sp(CSVInferSchema.scala:259)
- at org.apache.spark.sql.execution.datasources.csv.CSVTypeCast$$anonfun$castTo$4.apply(CSVInferSchema.scala:259)
- at org.apache.spark.sql.execution.datasources.csv.CSVTypeCast$$anonfun$castTo$4.apply(CSVInferSchema.scala:259)
- at scala.util.Try.getOrElse(Try.scala:79)
- at org.apache.spark.sql.execution.datasources.csv.CSVTypeCast$.castTo(CSVInferSchema.scala:259)
- at org.apache.spark.sql.execution.datasources.csv.CSVRelation$$anonfun$csvParser$3.apply(CSVRelation.scala:116)
- at org.apache.spark.sql.execution.datasources.csv.CSVRelation$$anonfun$csvParser$3.apply(CSVRelation.scala:85)
- at org.apache.spark.sql.execution.datasources.csv.CSVFileFormat$$anonfun$buildReader$1$$anonfun$apply$2.apply(CSVFileFormat.scala:128)
- at org.apache.spark.sql.execution.datasources.csv.CSVFileFormat$$anonfun$buildReader$1$$anonfun$apply$2.apply(CSVFileFormat.scala:127)
- at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
- at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:91)
- at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)
- at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
- at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:370)
- at org.apache.spark.sql.execution.columnar.InMemoryRelation$$anonfun$1$$anon$1.next(InMemoryRelation.scala:106)
- at org.apache.spark.sql.execution.columnar.InMemoryRelation$$anonfun$1$$anon$1.next(InMemoryRelation.scala:98)
- at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:214)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:935)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:926)
- at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:866)
- at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:926)
- at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:670)
- at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:330)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:281)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:79)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:47)
- at org.apache.spark.scheduler.Task.run(Task.scala:86)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 17/01/20 09:36:58 ERROR TaskSetManager: Task 2 in stage 3.0 failed 1 times; aborting job
- 17/01/20 09:36:58 ERROR Executor: Exception in task 4.0 in stage 3.0 (TID 162)
- org.apache.spark.TaskKilledException
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:264)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- Traceback (most recent call last):
- File "/home/hadoop/./adiel.py", line 59, in <module>
- withColumn("Variance", pow(col("Stddev"), 2)).show(3, False)
- File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/sql/dataframe.py", line 287, in show
- File "/usr/lib/spark/python/lib/py4j-0.10.3-src.zip/py4j/java_gateway.py", line 1133, in __call__
- File "/usr/lib/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 63, in deco
- File "/usr/lib/spark/python/lib/py4j-0.10.3-src.zip/py4j/protocol.py", line 319, in get_return_value
- py4j.protocol.Py4JJavaError: An error occurred while calling o77.showString.
- : org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 3.0 failed 1 times, most recent failure: Lost task 2.0 in stage 3.0 (TID 160, localhost): java.lang.NullPointerException
- at java.text.DecimalFormat.parse(DecimalFormat.java:1997)
- at java.text.NumberFormat.parse(NumberFormat.java:383)
- at org.apache.spark.sql.execution.datasources.csv.CSVTypeCast$$anonfun$castTo$4.apply$mcD$sp(CSVInferSchema.scala:259)
- at org.apache.spark.sql.execution.datasources.csv.CSVTypeCast$$anonfun$castTo$4.apply(CSVInferSchema.scala:259)
- at org.apache.spark.sql.execution.datasources.csv.CSVTypeCast$$anonfun$castTo$4.apply(CSVInferSchema.scala:259)
- at scala.util.Try.getOrElse(Try.scala:79)
- at org.apache.spark.sql.execution.datasources.csv.CSVTypeCast$.castTo(CSVInferSchema.scala:259)
- at org.apache.spark.sql.execution.datasources.csv.CSVRelation$$anonfun$csvParser$3.apply(CSVRelation.scala:116)
- at org.apache.spark.sql.execution.datasources.csv.CSVRelation$$anonfun$csvParser$3.apply(CSVRelation.scala:85)
- at org.apache.spark.sql.execution.datasources.csv.CSVFileFormat$$anonfun$buildReader$1$$anonfun$apply$2.apply(CSVFileFormat.scala:128)
- at org.apache.spark.sql.execution.datasources.csv.CSVFileFormat$$anonfun$buildReader$1$$anonfun$apply$2.apply(CSVFileFormat.scala:127)
- at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
- at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:91)
- at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)
- at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
- at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:370)
- at org.apache.spark.sql.execution.columnar.InMemoryRelation$$anonfun$1$$anon$1.next(InMemoryRelation.scala:106)
- at org.apache.spark.sql.execution.columnar.InMemoryRelation$$anonfun$1$$anon$1.next(InMemoryRelation.scala:98)
- at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:214)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:935)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:926)
- at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:866)
- at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:926)
- at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:670)
- at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:330)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:281)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:79)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:47)
- at org.apache.spark.scheduler.Task.run(Task.scala:86)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- Driver stacktrace:
- at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1454)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1442)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1441)
- at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
- at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
- at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1441)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:811)
- at scala.Option.foreach(Option.scala:257)
- at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:811)
- at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1667)
- at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1622)
- at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1611)
- at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
- at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:632)
- at org.apache.spark.SparkContext.runJob(SparkContext.scala:1873)
- at org.apache.spark.SparkContext.runJob(SparkContext.scala:1886)
- at org.apache.spark.SparkContext.runJob(SparkContext.scala:1899)
- at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:347)
- at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:39)
- at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$execute$1$1.apply(Dataset.scala:2193)
- at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57)
- at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2546)
- at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$execute$1(Dataset.scala:2192)
- at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collect(Dataset.scala:2199)
- at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:1935)
- at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:1934)
- at org.apache.spark.sql.Dataset.withTypedCallback(Dataset.scala:2576)
- at org.apache.spark.sql.Dataset.head(Dataset.scala:1934)
- at org.apache.spark.sql.Dataset.take(Dataset.scala:2149)
- at org.apache.spark.sql.Dataset.showString(Dataset.scala:239)
- at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
- at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
- at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
- at java.lang.reflect.Method.invoke(Method.java:498)
- at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:237)
- at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
- at py4j.Gateway.invoke(Gateway.java:280)
- at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
- at py4j.commands.CallCommand.execute(CallCommand.java:79)
- at py4j.GatewayConnection.run(GatewayConnection.java:214)
- at java.lang.Thread.run(Thread.java:745)
- Caused by: java.lang.NullPointerException
- at java.text.DecimalFormat.parse(DecimalFormat.java:1997)
- at java.text.NumberFormat.parse(NumberFormat.java:383)
- at org.apache.spark.sql.execution.datasources.csv.CSVTypeCast$$anonfun$castTo$4.apply$mcD$sp(CSVInferSchema.scala:259)
- at org.apache.spark.sql.execution.datasources.csv.CSVTypeCast$$anonfun$castTo$4.apply(CSVInferSchema.scala:259)
- at org.apache.spark.sql.execution.datasources.csv.CSVTypeCast$$anonfun$castTo$4.apply(CSVInferSchema.scala:259)
- at scala.util.Try.getOrElse(Try.scala:79)
- at org.apache.spark.sql.execution.datasources.csv.CSVTypeCast$.castTo(CSVInferSchema.scala:259)
- at org.apache.spark.sql.execution.datasources.csv.CSVRelation$$anonfun$csvParser$3.apply(CSVRelation.scala:116)
- at org.apache.spark.sql.execution.datasources.csv.CSVRelation$$anonfun$csvParser$3.apply(CSVRelation.scala:85)
- at org.apache.spark.sql.execution.datasources.csv.CSVFileFormat$$anonfun$buildReader$1$$anonfun$apply$2.apply(CSVFileFormat.scala:128)
- at org.apache.spark.sql.execution.datasources.csv.CSVFileFormat$$anonfun$buildReader$1$$anonfun$apply$2.apply(CSVFileFormat.scala:127)
- at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
- at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:91)
- at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIterator.processNext(Unknown Source)
- at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
- at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$8$$anon$1.hasNext(WholeStageCodegenExec.scala:370)
- at org.apache.spark.sql.execution.columnar.InMemoryRelation$$anonfun$1$$anon$1.next(InMemoryRelation.scala:106)
- at org.apache.spark.sql.execution.columnar.InMemoryRelation$$anonfun$1$$anon$1.next(InMemoryRelation.scala:98)
- at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:214)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:935)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:926)
- at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:866)
- at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:926)
- at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:670)
- at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:330)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:281)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:319)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:283)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:79)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:47)
- at org.apache.spark.scheduler.Task.run(Task.scala:86)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- ... 1 more
- 17/01/20 09:36:59 ERROR TaskContextImpl: Error in TaskCompletionListener
- java.io.IOException: Filesystem closed
- at org.apache.hadoop.hdfs.DFSClient.checkOpen(DFSClient.java:808)
- at org.apache.hadoop.hdfs.DFSInputStream.close(DFSInputStream.java:710)
- at java.io.FilterInputStream.close(FilterInputStream.java:181)
- at org.apache.hadoop.util.LineReader.close(LineReader.java:150)
- at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.close(LineRecordReader.java:231)
- at org.apache.spark.sql.execution.datasources.RecordReaderIterator.close(RecordReaderIterator.scala:66)
- at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.close(HadoopFileLinesReader.scala:54)
- at org.apache.spark.sql.execution.datasources.csv.CSVFileFormat$$anonfun$buildReader$1$$anonfun$7$$anonfun$apply$1.apply(CSVFileFormat.scala:116)
- at org.apache.spark.sql.execution.datasources.csv.CSVFileFormat$$anonfun$buildReader$1$$anonfun$7$$anonfun$apply$1.apply(CSVFileFormat.scala:116)
- at org.apache.spark.TaskContext$$anon$1.onTaskCompletion(TaskContext.scala:123)
- at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:97)
- at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:95)
- at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
- at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
- at org.apache.spark.TaskContextImpl.markTaskCompleted(TaskContextImpl.scala:95)
- at org.apache.spark.scheduler.Task.run(Task.scala:99)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 17/01/20 09:36:59 ERROR TaskContextImpl: Error in TaskCompletionListener
- java.lang.IllegalStateException: Block broadcast_5 not found
- at org.apache.spark.storage.BlockInfoManager$$anonfun$1.apply(BlockInfoManager.scala:288)
- at org.apache.spark.storage.BlockInfoManager$$anonfun$1.apply(BlockInfoManager.scala:288)
- at scala.Option.getOrElse(Option.scala:121)
- at org.apache.spark.storage.BlockInfoManager.unlock(BlockInfoManager.scala:287)
- at org.apache.spark.storage.BlockManager.releaseLock(BlockManager.scala:630)
- at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$releaseLock$1.apply(TorrentBroadcast.scala:210)
- at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$releaseLock$1.apply(TorrentBroadcast.scala:210)
- at org.apache.spark.TaskContext$$anon$1.onTaskCompletion(TaskContext.scala:123)
- at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:97)
- at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:95)
- at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
- at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
- at org.apache.spark.TaskContextImpl.markTaskCompleted(TaskContextImpl.scala:95)
- at org.apache.spark.scheduler.Task.run(Task.scala:99)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 17/01/20 09:36:59 ERROR TaskContextImpl: Error in TaskCompletionListener
- java.lang.IllegalStateException: Block broadcast_6 not found
- at org.apache.spark.storage.BlockInfoManager$$anonfun$1.apply(BlockInfoManager.scala:288)
- at org.apache.spark.storage.BlockInfoManager$$anonfun$1.apply(BlockInfoManager.scala:288)
- at scala.Option.getOrElse(Option.scala:121)
- at org.apache.spark.storage.BlockInfoManager.unlock(BlockInfoManager.scala:287)
- at org.apache.spark.storage.BlockManager.releaseLock(BlockManager.scala:630)
- at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$releaseLock$1.apply(TorrentBroadcast.scala:210)
- at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$releaseLock$1.apply(TorrentBroadcast.scala:210)
- at org.apache.spark.TaskContext$$anon$1.onTaskCompletion(TaskContext.scala:123)
- at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:97)
- at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:95)
- at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
- at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
- at org.apache.spark.TaskContextImpl.markTaskCompleted(TaskContextImpl.scala:95)
- at org.apache.spark.scheduler.Task.run(Task.scala:99)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 17/01/20 09:36:59 ERROR Executor: Exception in task 1.0 in stage 3.0 (TID 159)
- java.util.NoSuchElementException: None.get
- at scala.None$.get(Option.scala:347)
- at scala.None$.get(Option.scala:345)
- at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(BlockInfoManager.scala:343)
- at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockManager.scala:646)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 17/01/20 09:36:59 ERROR TaskContextImpl: Error in TaskCompletionListener
- java.io.IOException: Filesystem closed
- at org.apache.hadoop.hdfs.DFSClient.checkOpen(DFSClient.java:808)
- at org.apache.hadoop.hdfs.DFSInputStream.close(DFSInputStream.java:710)
- at java.io.FilterInputStream.close(FilterInputStream.java:181)
- at org.apache.hadoop.util.LineReader.close(LineReader.java:150)
- at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.close(LineRecordReader.java:231)
- at org.apache.spark.sql.execution.datasources.RecordReaderIterator.close(RecordReaderIterator.scala:66)
- at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.close(HadoopFileLinesReader.scala:54)
- at org.apache.spark.sql.execution.datasources.csv.CSVFileFormat$$anonfun$buildReader$1$$anonfun$7$$anonfun$apply$1.apply(CSVFileFormat.scala:116)
- at org.apache.spark.sql.execution.datasources.csv.CSVFileFormat$$anonfun$buildReader$1$$anonfun$7$$anonfun$apply$1.apply(CSVFileFormat.scala:116)
- at org.apache.spark.TaskContext$$anon$1.onTaskCompletion(TaskContext.scala:123)
- at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:97)
- at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:95)
- at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
- at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
- at org.apache.spark.TaskContextImpl.markTaskCompleted(TaskContextImpl.scala:95)
- at org.apache.spark.scheduler.Task.run(Task.scala:99)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 17/01/20 09:36:59 ERROR TaskContextImpl: Error in TaskCompletionListener
- java.lang.IllegalStateException: Block broadcast_5 not found
- at org.apache.spark.storage.BlockInfoManager$$anonfun$1.apply(BlockInfoManager.scala:288)
- at org.apache.spark.storage.BlockInfoManager$$anonfun$1.apply(BlockInfoManager.scala:288)
- at scala.Option.getOrElse(Option.scala:121)
- at org.apache.spark.storage.BlockInfoManager.unlock(BlockInfoManager.scala:287)
- at org.apache.spark.storage.BlockManager.releaseLock(BlockManager.scala:630)
- at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$releaseLock$1.apply(TorrentBroadcast.scala:210)
- at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$releaseLock$1.apply(TorrentBroadcast.scala:210)
- at org.apache.spark.TaskContext$$anon$1.onTaskCompletion(TaskContext.scala:123)
- at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:97)
- at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:95)
- at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
- at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
- at org.apache.spark.TaskContextImpl.markTaskCompleted(TaskContextImpl.scala:95)
- at org.apache.spark.scheduler.Task.run(Task.scala:99)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 17/01/20 09:36:59 ERROR TaskContextImpl: Error in TaskCompletionListener
- java.lang.IllegalStateException: Block broadcast_6 not found
- at org.apache.spark.storage.BlockInfoManager$$anonfun$1.apply(BlockInfoManager.scala:288)
- at org.apache.spark.storage.BlockInfoManager$$anonfun$1.apply(BlockInfoManager.scala:288)
- at scala.Option.getOrElse(Option.scala:121)
- at org.apache.spark.storage.BlockInfoManager.unlock(BlockInfoManager.scala:287)
- at org.apache.spark.storage.BlockManager.releaseLock(BlockManager.scala:630)
- at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$releaseLock$1.apply(TorrentBroadcast.scala:210)
- at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$releaseLock$1.apply(TorrentBroadcast.scala:210)
- at org.apache.spark.TaskContext$$anon$1.onTaskCompletion(TaskContext.scala:123)
- at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:97)
- at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:95)
- at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
- at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
- at org.apache.spark.TaskContextImpl.markTaskCompleted(TaskContextImpl.scala:95)
- at org.apache.spark.scheduler.Task.run(Task.scala:99)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 17/01/20 09:36:59 ERROR Executor: Exception in task 0.0 in stage 3.0 (TID 158)
- java.util.NoSuchElementException: None.get
- at scala.None$.get(Option.scala:347)
- at scala.None$.get(Option.scala:345)
- at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(BlockInfoManager.scala:343)
- at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockManager.scala:646)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 17/01/20 09:36:59 ERROR TaskContextImpl: Error in TaskCompletionListener
- java.io.IOException: Filesystem closed
- at org.apache.hadoop.hdfs.DFSClient.checkOpen(DFSClient.java:808)
- at org.apache.hadoop.hdfs.DFSInputStream.close(DFSInputStream.java:710)
- at java.io.FilterInputStream.close(FilterInputStream.java:181)
- at org.apache.hadoop.util.LineReader.close(LineReader.java:150)
- at org.apache.hadoop.mapreduce.lib.input.LineRecordReader.close(LineRecordReader.java:231)
- at org.apache.spark.sql.execution.datasources.RecordReaderIterator.close(RecordReaderIterator.scala:66)
- at org.apache.spark.sql.execution.datasources.HadoopFileLinesReader.close(HadoopFileLinesReader.scala:54)
- at org.apache.spark.sql.execution.datasources.csv.CSVFileFormat$$anonfun$buildReader$1$$anonfun$7$$anonfun$apply$1.apply(CSVFileFormat.scala:116)
- at org.apache.spark.sql.execution.datasources.csv.CSVFileFormat$$anonfun$buildReader$1$$anonfun$7$$anonfun$apply$1.apply(CSVFileFormat.scala:116)
- at org.apache.spark.TaskContext$$anon$1.onTaskCompletion(TaskContext.scala:123)
- at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:97)
- at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:95)
- at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
- at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
- at org.apache.spark.TaskContextImpl.markTaskCompleted(TaskContextImpl.scala:95)
- at org.apache.spark.scheduler.Task.run(Task.scala:99)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 17/01/20 09:36:59 ERROR TaskContextImpl: Error in TaskCompletionListener
- java.lang.IllegalStateException: Block broadcast_5 not found
- at org.apache.spark.storage.BlockInfoManager$$anonfun$1.apply(BlockInfoManager.scala:288)
- at org.apache.spark.storage.BlockInfoManager$$anonfun$1.apply(BlockInfoManager.scala:288)
- at scala.Option.getOrElse(Option.scala:121)
- at org.apache.spark.storage.BlockInfoManager.unlock(BlockInfoManager.scala:287)
- at org.apache.spark.storage.BlockManager.releaseLock(BlockManager.scala:630)
- at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$releaseLock$1.apply(TorrentBroadcast.scala:210)
- at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$releaseLock$1.apply(TorrentBroadcast.scala:210)
- at org.apache.spark.TaskContext$$anon$1.onTaskCompletion(TaskContext.scala:123)
- at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:97)
- at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:95)
- at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
- at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
- at org.apache.spark.TaskContextImpl.markTaskCompleted(TaskContextImpl.scala:95)
- at org.apache.spark.scheduler.Task.run(Task.scala:99)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 17/01/20 09:36:59 ERROR TaskContextImpl: Error in TaskCompletionListener
- java.lang.IllegalStateException: Block broadcast_6 not found
- at org.apache.spark.storage.BlockInfoManager$$anonfun$1.apply(BlockInfoManager.scala:288)
- at org.apache.spark.storage.BlockInfoManager$$anonfun$1.apply(BlockInfoManager.scala:288)
- at scala.Option.getOrElse(Option.scala:121)
- at org.apache.spark.storage.BlockInfoManager.unlock(BlockInfoManager.scala:287)
- at org.apache.spark.storage.BlockManager.releaseLock(BlockManager.scala:630)
- at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$releaseLock$1.apply(TorrentBroadcast.scala:210)
- at org.apache.spark.broadcast.TorrentBroadcast$$anonfun$org$apache$spark$broadcast$TorrentBroadcast$$releaseLock$1.apply(TorrentBroadcast.scala:210)
- at org.apache.spark.TaskContext$$anon$1.onTaskCompletion(TaskContext.scala:123)
- at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:97)
- at org.apache.spark.TaskContextImpl$$anonfun$markTaskCompleted$1.apply(TaskContextImpl.scala:95)
- at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
- at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
- at org.apache.spark.TaskContextImpl.markTaskCompleted(TaskContextImpl.scala:95)
- at org.apache.spark.scheduler.Task.run(Task.scala:99)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:274)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 17/01/20 09:36:59 ERROR Executor: Exception in task 3.0 in stage 3.0 (TID 161)
- java.util.NoSuchElementException: None.get
- at scala.None$.get(Option.scala:347)
- at scala.None$.get(Option.scala:345)
- at org.apache.spark.storage.BlockInfoManager.releaseAllLocksForTask(BlockInfoManager.scala:343)
- at org.apache.spark.storage.BlockManager.releaseAllLocksForTask(BlockManager.scala:646)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:281)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement