Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- scala> val textFile = sc.textFile("hdfs://192.168.30.157:8020/cervantes/novela/quijote.txt").flatMap(line=>line.split(" ")).map(word=>(word,1)).reduceByKey(_+_).saveAsTextFile("output-quijote")
- 16/04/27 18:59:44 WARN BlockReaderFactory: I/O error constructing remote block reader.
- java.net.ConnectException: Connection refused
- at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
- at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
- at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
- at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530)
- at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3101)
- at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:755)
- at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:670)
- at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:337)
- at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:576)
- at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
- at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:847)
- at java.io.DataInputStream.read(DataInputStream.java:100)
- at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:180)
- at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)
- at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)
- at org.apache.hadoop.mapred.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:206)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:244)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:47)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:246)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:208)
- at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
- at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:189)
- at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:64)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
- at org.apache.spark.scheduler.Task.run(Task.scala:89)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 16/04/27 18:59:44 WARN DFSClient: Failed to connect to /127.0.0.1:50010 for block, add to deadNodes and continue. java.net.ConnectException: Connection refused
- java.net.ConnectException: Connection refused
- at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
- at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
- at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
- at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530)
- at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3101)
- at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:755)
- at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:670)
- at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:337)
- at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:576)
- at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
- at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:847)
- at java.io.DataInputStream.read(DataInputStream.java:100)
- at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:180)
- at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)
- at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)
- at org.apache.hadoop.mapred.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:206)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:244)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:47)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:246)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:208)
- at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
- at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:189)
- at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:64)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
- at org.apache.spark.scheduler.Task.run(Task.scala:89)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 16/04/27 18:59:44 WARN DFSClient: DFS chooseDataNode: got # 1 IOException, will wait for 1306.5790151229824 msec.
- 16/04/27 18:59:44 WARN BlockReaderFactory: I/O error constructing remote block reader.
- java.net.ConnectException: Connection refused
- at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
- at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
- at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
- at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530)
- at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3101)
- at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:755)
- at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:670)
- at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:337)
- at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:576)
- at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
- at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:847)
- at java.io.DataInputStream.read(DataInputStream.java:100)
- at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:180)
- at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)
- at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)
- at org.apache.hadoop.mapred.LineRecordReader.<init>(LineRecordReader.java:134)
- at org.apache.hadoop.mapred.TextInputFormat.getRecordReader(TextInputFormat.java:67)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.<init>(HadoopRDD.scala:237)
- at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:208)
- at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:101)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
- at org.apache.spark.scheduler.Task.run(Task.scala:89)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 16/04/27 18:59:44 WARN DFSClient: Failed to connect to /127.0.0.1:50010 for block, add to deadNodes and continue. java.net.ConnectException: Connection refused
- java.net.ConnectException: Connection refused
- at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
- at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
- at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
- at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530)
- at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3101)
- at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:755)
- at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:670)
- at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:337)
- at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:576)
- at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
- at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:847)
- at java.io.DataInputStream.read(DataInputStream.java:100)
- at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:180)
- at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)
- at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)
- at org.apache.hadoop.mapred.LineRecordReader.<init>(LineRecordReader.java:134)
- at org.apache.hadoop.mapred.TextInputFormat.getRecordReader(TextInputFormat.java:67)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.<init>(HadoopRDD.scala:237)
- at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:208)
- at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:101)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
- at org.apache.spark.scheduler.Task.run(Task.scala:89)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 16/04/27 18:59:44 WARN DFSClient: DFS chooseDataNode: got # 1 IOException, will wait for 1316.8481425690604 msec.
- [Stage 6:> (0 + 2) / 2]16/04/27 18:59:45 WARN BlockReaderFactory: I/O error constructing remote block reader.
- java.net.ConnectException: Connection refused
- at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
- at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
- at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
- at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530)
- at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3101)
- at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:755)
- at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:670)
- at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:337)
- at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:576)
- at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
- at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:847)
- at java.io.DataInputStream.read(DataInputStream.java:100)
- at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:180)
- at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)
- at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)
- at org.apache.hadoop.mapred.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:206)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:244)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:47)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:246)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:208)
- at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
- at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:189)
- at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:64)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
- at org.apache.spark.scheduler.Task.run(Task.scala:89)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 16/04/27 18:59:45 WARN DFSClient: Failed to connect to /127.0.0.1:50010 for block, add to deadNodes and continue. java.net.ConnectException: Connection refused
- java.net.ConnectException: Connection refused
- at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
- at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
- at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
- at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530)
- at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3101)
- at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:755)
- at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:670)
- at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:337)
- at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:576)
- at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
- at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:847)
- at java.io.DataInputStream.read(DataInputStream.java:100)
- at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:180)
- at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)
- at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)
- at org.apache.hadoop.mapred.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:206)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:244)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:47)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:246)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:208)
- at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
- at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:189)
- at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:64)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
- at org.apache.spark.scheduler.Task.run(Task.scala:89)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 16/04/27 18:59:45 WARN DFSClient: DFS chooseDataNode: got # 2 IOException, will wait for 5344.531746769184 msec.
- 16/04/27 18:59:45 WARN BlockReaderFactory: I/O error constructing remote block reader.
- java.net.ConnectException: Connection refused
- at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
- at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
- at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
- at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530)
- at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3101)
- at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:755)
- at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:670)
- at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:337)
- at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:576)
- at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
- at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:847)
- at java.io.DataInputStream.read(DataInputStream.java:100)
- at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:180)
- at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)
- at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)
- at org.apache.hadoop.mapred.LineRecordReader.<init>(LineRecordReader.java:134)
- at org.apache.hadoop.mapred.TextInputFormat.getRecordReader(TextInputFormat.java:67)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.<init>(HadoopRDD.scala:237)
- at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:208)
- at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:101)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
- at org.apache.spark.scheduler.Task.run(Task.scala:89)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 16/04/27 18:59:45 WARN DFSClient: Failed to connect to /127.0.0.1:50010 for block, add to deadNodes and continue. java.net.ConnectException: Connection refused
- java.net.ConnectException: Connection refused
- at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
- at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
- at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
- at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530)
- at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3101)
- at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:755)
- at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:670)
- at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:337)
- at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:576)
- at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
- at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:847)
- at java.io.DataInputStream.read(DataInputStream.java:100)
- at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:180)
- at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)
- at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)
- at org.apache.hadoop.mapred.LineRecordReader.<init>(LineRecordReader.java:134)
- at org.apache.hadoop.mapred.TextInputFormat.getRecordReader(TextInputFormat.java:67)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.<init>(HadoopRDD.scala:237)
- at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:208)
- at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:101)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
- at org.apache.spark.scheduler.Task.run(Task.scala:89)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 16/04/27 18:59:45 WARN DFSClient: DFS chooseDataNode: got # 2 IOException, will wait for 3568.022412691614 msec.
- 16/04/27 18:59:49 WARN BlockReaderFactory: I/O error constructing remote block reader.
- java.net.ConnectException: Connection refused
- at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
- at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
- at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
- at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530)
- at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3101)
- at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:755)
- at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:670)
- at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:337)
- at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:576)
- at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
- at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:847)
- at java.io.DataInputStream.read(DataInputStream.java:100)
- at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:180)
- at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)
- at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)
- at org.apache.hadoop.mapred.LineRecordReader.<init>(LineRecordReader.java:134)
- at org.apache.hadoop.mapred.TextInputFormat.getRecordReader(TextInputFormat.java:67)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.<init>(HadoopRDD.scala:237)
- at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:208)
- at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:101)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
- at org.apache.spark.scheduler.Task.run(Task.scala:89)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 16/04/27 18:59:49 WARN DFSClient: Failed to connect to /127.0.0.1:50010 for block, add to deadNodes and continue. java.net.ConnectException: Connection refused
- java.net.ConnectException: Connection refused
- at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
- at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
- at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
- at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530)
- at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3101)
- at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:755)
- at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:670)
- at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:337)
- at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:576)
- at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
- at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:847)
- at java.io.DataInputStream.read(DataInputStream.java:100)
- at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:180)
- at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)
- at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)
- at org.apache.hadoop.mapred.LineRecordReader.<init>(LineRecordReader.java:134)
- at org.apache.hadoop.mapred.TextInputFormat.getRecordReader(TextInputFormat.java:67)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.<init>(HadoopRDD.scala:237)
- at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:208)
- at org.apache.spark.rdd.HadoopRDD.compute(HadoopRDD.scala:101)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
- at org.apache.spark.scheduler.Task.run(Task.scala:89)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 16/04/27 18:59:49 WARN DFSClient: DFS chooseDataNode: got # 3 IOException, will wait for 13606.690016648003 msec.
- 16/04/27 18:59:51 WARN BlockReaderFactory: I/O error constructing remote block reader.
- java.net.ConnectException: Connection refused
- at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
- at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
- at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
- at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530)
- at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3101)
- at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:755)
- at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:670)
- at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:337)
- at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:576)
- at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
- at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:847)
- at java.io.DataInputStream.read(DataInputStream.java:100)
- at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:180)
- at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)
- at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)
- at org.apache.hadoop.mapred.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:206)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:244)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:47)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:246)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:208)
- at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
- at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:189)
- at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:64)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
- at org.apache.spark.scheduler.Task.run(Task.scala:89)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 16/04/27 18:59:51 WARN DFSClient: Failed to connect to /127.0.0.1:50010 for block, add to deadNodes and continue. java.net.ConnectException: Connection refused
- java.net.ConnectException: Connection refused
- at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
- at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
- at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
- at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530)
- at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3101)
- at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:755)
- at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:670)
- at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:337)
- at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:576)
- at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
- at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:847)
- at java.io.DataInputStream.read(DataInputStream.java:100)
- at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:180)
- at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)
- at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)
- at org.apache.hadoop.mapred.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:206)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:244)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:47)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:246)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:208)
- at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
- at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:189)
- at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:64)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
- at org.apache.spark.scheduler.Task.run(Task.scala:89)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 16/04/27 18:59:51 WARN DFSClient: DFS chooseDataNode: got # 3 IOException, will wait for 9560.09839904081 msec.
- 16/04/27 19:00:00 WARN BlockReaderFactory: I/O error constructing remote block reader.
- java.net.ConnectException: Connection refused
- at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
- at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
- at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
- at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530)
- at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3101)
- at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:755)
- at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:670)
- at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:337)
- at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:576)
- at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
- at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:847)
- at java.io.DataInputStream.read(DataInputStream.java:100)
- at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:180)
- at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)
- at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)
- at org.apache.hadoop.mapred.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:206)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:244)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:47)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:246)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:208)
- at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
- at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:189)
- at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:64)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
- at org.apache.spark.scheduler.Task.run(Task.scala:89)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 16/04/27 19:00:00 WARN DFSClient: Failed to connect to /127.0.0.1:50010 for block, add to deadNodes and continue. java.net.ConnectException: Connection refused
- java.net.ConnectException: Connection refused
- at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
- at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:717)
- at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:206)
- at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:530)
- at org.apache.hadoop.hdfs.DFSClient.newConnectedPeer(DFSClient.java:3101)
- at org.apache.hadoop.hdfs.BlockReaderFactory.nextTcpPeer(BlockReaderFactory.java:755)
- at org.apache.hadoop.hdfs.BlockReaderFactory.getRemoteBlockReaderFromTcp(BlockReaderFactory.java:670)
- at org.apache.hadoop.hdfs.BlockReaderFactory.build(BlockReaderFactory.java:337)
- at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:576)
- at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
- at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:847)
- at java.io.DataInputStream.read(DataInputStream.java:100)
- at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:180)
- at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)
- at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)
- at org.apache.hadoop.mapred.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:206)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:244)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:47)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:246)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:208)
- at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
- at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:189)
- at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:64)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
- at org.apache.spark.scheduler.Task.run(Task.scala:89)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 16/04/27 19:00:00 WARN DFSClient: Could not obtain block: BP-286282631-127.0.0.1-1433865208026:blk_1073743637_2859 file=/cervantes/novela/quijote.txt No live nodes contain current block Block locations: 127.0.0.1:50010 Dead nodes: 127.0.0.1:50010. Throwing a BlockMissingException
- 16/04/27 19:00:00 WARN DFSClient: Could not obtain block: BP-286282631-127.0.0.1-1433865208026:blk_1073743637_2859 file=/cervantes/novela/quijote.txt No live nodes contain current block Block locations: 127.0.0.1:50010 Dead nodes: 127.0.0.1:50010. Throwing a BlockMissingException
- 16/04/27 19:00:00 WARN DFSClient: DFS Read
- org.apache.hadoop.hdfs.BlockMissingException: Could not obtain block: BP-286282631-127.0.0.1-1433865208026:blk_1073743637_2859 file=/cervantes/novela/quijote.txt
- at org.apache.hadoop.hdfs.DFSInputStream.chooseDataNode(DFSInputStream.java:888)
- at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:568)
- at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
- at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:847)
- at java.io.DataInputStream.read(DataInputStream.java:100)
- at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:180)
- at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)
- at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)
- at org.apache.hadoop.mapred.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:206)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:244)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:47)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:246)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:208)
- at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
- at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:189)
- at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:64)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
- at org.apache.spark.scheduler.Task.run(Task.scala:89)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 16/04/27 19:00:00 ERROR Executor: Exception in task 0.0 in stage 6.0 (TID 15)
- org.apache.hadoop.hdfs.BlockMissingException: Could not obtain block: BP-286282631-127.0.0.1-1433865208026:blk_1073743637_2859 file=/cervantes/novela/quijote.txt
- at org.apache.hadoop.hdfs.DFSInputStream.chooseDataNode(DFSInputStream.java:888)
- at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:568)
- at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
- at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:847)
- at java.io.DataInputStream.read(DataInputStream.java:100)
- at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:180)
- at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)
- at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)
- at org.apache.hadoop.mapred.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:206)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:244)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:47)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:246)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:208)
- at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
- at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:189)
- at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:64)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
- at org.apache.spark.scheduler.Task.run(Task.scala:89)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 16/04/27 19:00:00 WARN TaskSetManager: Lost task 0.0 in stage 6.0 (TID 15, localhost): org.apache.hadoop.hdfs.BlockMissingException: Could not obtain block: BP-286282631-127.0.0.1-1433865208026:blk_1073743637_2859 file=/cervantes/novela/quijote.txt
- at org.apache.hadoop.hdfs.DFSInputStream.chooseDataNode(DFSInputStream.java:888)
- at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:568)
- at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
- at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:847)
- at java.io.DataInputStream.read(DataInputStream.java:100)
- at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:180)
- at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)
- at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)
- at org.apache.hadoop.mapred.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:206)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:244)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:47)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:246)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:208)
- at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
- at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:189)
- at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:64)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
- at org.apache.spark.scheduler.Task.run(Task.scala:89)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- 16/04/27 19:00:00 ERROR TaskSetManager: Task 0 in stage 6.0 failed 1 times; aborting job
- org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 6.0 failed 1 times, most recent failure: Lost task 0.0 in stage 6.0 (TID 15, localhost): org.apache.hadoop.hdfs.BlockMissingException: Could not obtain block: BP-286282631-127.0.0.1-1433865208026:blk_1073743637_2859 file=/cervantes/novela/quijote.txt
- at org.apache.hadoop.hdfs.DFSInputStream.chooseDataNode(DFSInputStream.java:888)
- at org.apache.hadoop.hdfs.DFSInputStream.blockSeekTo(DFSInputStream.java:568)
- at org.apache.hadoop.hdfs.DFSInputStream.readWithStrategy(DFSInputStream.java:800)
- at org.apache.hadoop.hdfs.DFSInputStream.read(DFSInputStream.java:847)
- at java.io.DataInputStream.read(DataInputStream.java:100)
- at org.apache.hadoop.util.LineReader.fillBuffer(LineReader.java:180)
- at org.apache.hadoop.util.LineReader.readDefaultLine(LineReader.java:216)
- at org.apache.hadoop.util.LineReader.readLine(LineReader.java:174)
- at org.apache.hadoop.mapred.LineRecordReader.skipUtfByteOrderMark(LineRecordReader.java:206)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:244)
- at org.apache.hadoop.mapred.LineRecordReader.next(LineRecordReader.java:47)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:246)
- at org.apache.spark.rdd.HadoopRDD$$anon$1.getNext(HadoopRDD.scala:208)
- at org.apache.spark.util.NextIterator.hasNext(NextIterator.scala:73)
- at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:39)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at scala.collection.Iterator$$anon$13.hasNext(Iterator.scala:371)
- at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:327)
- at org.apache.spark.util.collection.ExternalSorter.insertAll(ExternalSorter.scala:189)
- at org.apache.spark.shuffle.sort.SortShuffleWriter.write(SortShuffleWriter.scala:64)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:73)
- at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:41)
- at org.apache.spark.scheduler.Task.run(Task.scala:89)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- Driver stacktrace:
- at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418)
- at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
- at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
- at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
- at scala.Option.foreach(Option.scala:236)
- at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799)
- at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640)
- at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
- at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
- at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
- at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620)
- at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832)
- at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845)
- at org.apache.spark.SparkContext.runJob(SparkContext.scala:1922)
- at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1213)
- at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1156)
- at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopDataset$1.apply(PairRDDFunctions.scala:1156)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
- at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
- at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopDataset(PairRDDFunctions.scala:1156)
- at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply$mcV$sp(PairRDDFunctions.scala:1060)
- at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:1026)
- at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$4.apply(PairRDDFunctions.scala:1026)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
- at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
- at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:1026)
- at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply$mcV$sp(PairRDDFunctions.scala:952)
- at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:952)
- at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsHadoopFile$1.apply(PairRDDFunctions.scala:952)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
- at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
- at org.apache.spark.rdd.PairRDDFunctions.saveAsHadoopFile(PairRDDFunctions.scala:951)
- at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply$mcV$sp(RDD.scala:1443)
- at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1422)
- at org.apache.spark.rdd.RDD$$anonfun$saveAsTextFile$1.apply(RDD.scala:1422)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
- at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
- at org.apache.spark.rdd.RDD.saveAsTextFile(RDD.scala:1422)
- at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:27)
- at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:32)
- at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:34)
- at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:36)
- at $iwC$$iwC$$iwC$$iwC.<init>(<console>:38)
- at $iwC$$iwC$$iwC.<init>(<console>:40)
- at $iwC$$iwC.<init>(<console>:42)
- at $iwC.<init>(<console>:44)
- at <init>(<console>:46)
- at .<init>(<console>:50)
- at .<clinit>(<console>)
- at .<init>(<console>:7)
- at .<clinit>(<console>)
- at $print(<console>)
- at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
- at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
- at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
- at java.lang.reflect.Method.invoke(Method.java:498)
- at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
- at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346)
- at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
- at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
- at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
- at org.apache.spark.repl.SparkILoop.reallyInterpret$1(SparkILoop.scala:857)
- at org.apache.spark.repl.SparkILoop.interpretStartingWith(SparkILoop.scala:902)
- at org.apache.spark.repl.SparkILoop.command(SparkILoop.scala:814)
- at org.apache.spark.repl.SparkILoop.processLine$1(SparkILoop.scala:657)
- at org.apache.spark.repl.SparkILoop.innerLoop$1(SparkILoop.scala:665)
- at org.apache.spark.repl.SparkILoop.org$apache$spark$repl$SparkILoop$$loop(SparkILoop.scala:670)
- at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop$$process$1.apply$mcZ$sp(SparkILoop.scala:997)
- at org.apache.spark.repl.SparkILoop$$anonfun$org$apache$spark$repl$SparkILoop
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement