Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- 2017-07-05 12:27:56,372 [task-result-getter-0] WARN TaskSetManager - Lost task 0.0 in stage 0.0 (TID 0, 172.18.26.77, executor 0): java.lang.OutOfMemoryError: Requested array size exceeds VM limit
- at java.lang.StringCoding$StringEncoder.encode(StringCoding.java:300)
- at java.lang.StringCoding.encode(StringCoding.java:344)
- at java.lang.String.getBytes(String.java:918)
- at org.sfsu.spark.CluewebReader$.getWarcRecordsFromString(CluewebReader.scala:315)
- at org.sfsu.spark.CluewebReader$$anonfun$getWarcRecordsFromDirectory$1.apply(CluewebReader.scala:305)
- at org.sfsu.spark.CluewebReader$$anonfun$getWarcRecordsFromDirectory$1.apply(CluewebReader.scala:305)
- at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
- at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
- at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:461)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:215)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1005)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:996)
- at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:936)
- at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:996)
- at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:700)
- at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
- at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
- at org.apache.spark.scheduler.Task.run(Task.scala:99)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:748)
- 2017-07-05 12:27:57,144 [dispatcher-event-loop-1] ERROR TaskSchedulerImpl - Lost executor 0 on 172.18.26.77: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
- 2017-07-05 12:27:57,150 [dispatcher-event-loop-1] WARN TaskSetManager - Lost task 0.1 in stage 0.0 (TID 1, 172.18.26.77, executor 0): ExecutorLostFailure (executor 0 exited caused by one of the running tasks) Reason: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
- 2017-07-05 12:28:09,245 [task-result-getter-1] WARN TaskSetManager - Lost task 0.2 in stage 0.0 (TID 2, 172.18.26.77, executor 1): java.lang.OutOfMemoryError: Requested array size exceeds VM limit
- at java.lang.StringCoding$StringEncoder.encode(StringCoding.java:300)
- at java.lang.StringCoding.encode(StringCoding.java:344)
- at java.lang.String.getBytes(String.java:918)
- at org.sfsu.spark.CluewebReader$.getWarcRecordsFromString(CluewebReader.scala:315)
- at org.sfsu.spark.CluewebReader$$anonfun$getWarcRecordsFromDirectory$1.apply(CluewebReader.scala:305)
- at org.sfsu.spark.CluewebReader$$anonfun$getWarcRecordsFromDirectory$1.apply(CluewebReader.scala:305)
- at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
- at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
- at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:461)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:215)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1005)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:996)
- at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:936)
- at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:996)
- at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:700)
- at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
- at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
- at org.apache.spark.scheduler.Task.run(Task.scala:99)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:748)
- 2017-07-05 12:28:10,199 [dispatcher-event-loop-3] ERROR TaskSchedulerImpl - Lost executor 1 on 172.18.26.77: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
- 2017-07-05 12:28:10,200 [dispatcher-event-loop-3] WARN TaskSetManager - Lost task 0.3 in stage 0.0 (TID 3, 172.18.26.77, executor 1): ExecutorLostFailure (executor 1 exited caused by one of the running tasks) Reason: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
- 2017-07-05 12:28:28,962 [task-result-getter-2] WARN TaskSetManager - Lost task 0.4 in stage 0.0 (TID 4, 172.18.26.77, executor 2): java.lang.OutOfMemoryError: Requested array size exceeds VM limit
- at java.lang.StringCoding$StringEncoder.encode(StringCoding.java:300)
- at java.lang.StringCoding.encode(StringCoding.java:344)
- at java.lang.String.getBytes(String.java:918)
- at org.sfsu.spark.CluewebReader$.getWarcRecordsFromString(CluewebReader.scala:315)
- at org.sfsu.spark.CluewebReader$$anonfun$getWarcRecordsFromDirectory$1.apply(CluewebReader.scala:305)
- at org.sfsu.spark.CluewebReader$$anonfun$getWarcRecordsFromDirectory$1.apply(CluewebReader.scala:305)
- at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
- at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
- at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:461)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:215)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1005)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:996)
- at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:936)
- at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:996)
- at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:700)
- at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
- at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
- at org.apache.spark.scheduler.Task.run(Task.scala:99)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:748)
- 2017-07-05 12:28:30,366 [dispatcher-event-loop-1] ERROR TaskSchedulerImpl - Lost executor 2 on 172.18.26.77: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
- 2017-07-05 12:28:30,366 [dispatcher-event-loop-1] WARN TaskSetManager - Lost task 0.5 in stage 0.0 (TID 5, 172.18.26.77, executor 2): ExecutorLostFailure (executor 2 exited caused by one of the running tasks) Reason: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
- 2017-07-05 12:28:46,695 [task-result-getter-3] WARN TaskSetManager - Lost task 0.6 in stage 0.0 (TID 6, 172.18.26.77, executor 3): java.lang.OutOfMemoryError: Requested array size exceeds VM limit
- at java.lang.StringCoding$StringEncoder.encode(StringCoding.java:300)
- at java.lang.StringCoding.encode(StringCoding.java:344)
- at java.lang.String.getBytes(String.java:918)
- at org.sfsu.spark.CluewebReader$.getWarcRecordsFromString(CluewebReader.scala:315)
- at org.sfsu.spark.CluewebReader$$anonfun$getWarcRecordsFromDirectory$1.apply(CluewebReader.scala:305)
- at org.sfsu.spark.CluewebReader$$anonfun$getWarcRecordsFromDirectory$1.apply(CluewebReader.scala:305)
- at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
- at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
- at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:461)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:215)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1005)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:996)
- at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:936)
- at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:996)
- at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:700)
- at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
- at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
- at org.apache.spark.scheduler.Task.run(Task.scala:99)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:748)
- 2017-07-05 12:28:47,510 [dispatcher-event-loop-2] ERROR TaskSchedulerImpl - Lost executor 3 on 172.18.26.77: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
- 2017-07-05 12:28:47,510 [dispatcher-event-loop-2] WARN TaskSetManager - Lost task 0.7 in stage 0.0 (TID 7, 172.18.26.77, executor 3): ExecutorLostFailure (executor 3 exited caused by one of the running tasks) Reason: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
- 2017-07-05 12:29:01,034 [task-result-getter-0] WARN TaskSetManager - Lost task 0.8 in stage 0.0 (TID 8, 172.18.26.77, executor 4): java.lang.OutOfMemoryError: Requested array size exceeds VM limit
- at java.lang.StringCoding$StringEncoder.encode(StringCoding.java:300)
- at java.lang.StringCoding.encode(StringCoding.java:344)
- at java.lang.String.getBytes(String.java:918)
- at org.sfsu.spark.CluewebReader$.getWarcRecordsFromString(CluewebReader.scala:315)
- at org.sfsu.spark.CluewebReader$$anonfun$getWarcRecordsFromDirectory$1.apply(CluewebReader.scala:305)
- at org.sfsu.spark.CluewebReader$$anonfun$getWarcRecordsFromDirectory$1.apply(CluewebReader.scala:305)
- at scala.collection.Iterator$$anon$12.nextCur(Iterator.scala:434)
- at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
- at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:461)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408)
- at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:215)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1005)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:996)
- at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:936)
- at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:996)
- at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:700)
- at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:334)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:285)
- at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
- at org.apache.spark.scheduler.Task.run(Task.scala:99)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:322)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:748)
- 2017-07-05 12:29:01,849 [dispatcher-event-loop-6] ERROR TaskSchedulerImpl - Lost executor 4 on 172.18.26.77: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
- 2017-07-05 12:29:01,849 [dispatcher-event-loop-6] WARN TaskSetManager - Lost task 0.9 in stage 0.0 (TID 9, 172.18.26.77, executor 4): ExecutorLostFailure (executor 4 exited caused by one of the running tasks) Reason: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
- 2017-07-05 12:29:01,850 [dispatcher-event-loop-6] ERROR TaskSetManager - Task 0 in stage 0.0 failed 10 times; aborting job
- Exception in thread "main" org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 0.0 failed 10 times, most recent failure: Lost task 0.9 in stage 0.0 (TID 9, 172.18.26.77, executor 4): ExecutorLostFailure (executor 4 exited caused by one of the running tasks) Reason: Remote RPC client disassociated. Likely due to containers exceeding thresholds, or network issues. Check driver logs for WARN messages.
- Driver stacktrace:
- at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422)
- at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
- at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
- at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
- at scala.Option.foreach(Option.scala:257)
- at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802)
- at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650)
- at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605)
- at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594)
- at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
- at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628)
- at org.apache.spark.SparkContext.runJob(SparkContext.scala:1925)
- at org.apache.spark.SparkContext.runJob(SparkContext.scala:1938)
- at org.apache.spark.SparkContext.runJob(SparkContext.scala:1951)
- at org.apache.spark.SparkContext.runJob(SparkContext.scala:1965)
- at org.apache.spark.rdd.RDD.count(RDD.scala:1158)
- at org.sfsu.spark.CluewebReader$.kmeanForClueWeb09WarcFiles(CluewebReader.scala:98)
- at org.sfsu.spark.SClusterDocsIndexToSolr.run(SClusterDocsIndexToSolr.scala:175)
- at com.lucidworks.spark.SparkApp.main(SparkApp.java:83)
- at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
- at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
- at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
- at java.lang.reflect.Method.invoke(Method.java:498)
- at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:743)
- at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:187)
- at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:212)
- at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:126)
- at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement