Advertisement
Guest User

Untitled

a guest
Jul 4th, 2015
229
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.82 KB | None | 0 0
  1. sc = SparkContext('local', "TDMS parser")
  2.  
  3. sc = SparkContext('spark://roman-pc:7077', "TDMS parser")
  4.  
  5. > 15/07/03 16:36:20 WARN TaskSetManager: Lost task 0.0 in stage 0.0 (TID
  6. > 0, 192.168.0.193): org.apache.spark.api.python.PythonException:
  7. > Traceback (most recent call last): File
  8. > "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/worker.py",
  9. > line 98, in main
  10. > command = pickleSer._read_with_length(infile) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/serializers.py",
  11. > line 164, in _read_with_length
  12. > return self.loads(obj) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/serializers.py",
  13. > line 421, in loads
  14. > return pickle.loads(obj) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/cloudpickle.py",
  15. > line 629, in subimport
  16. > __import__(name) ImportError: ('No module named pandas', <function subimport at 0x7fef3731cd70>, ('pandas',))
  17. >
  18. > at
  19. > org.apache.spark.api.python.PythonRDD$$anon$1.read(PythonRDD.scala:138)
  20. > at
  21. > org.apache.spark.api.python.PythonRDD$$anon$1.<init>(PythonRDD.scala:179)
  22. > at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:97)
  23. > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
  24. > at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) at
  25. > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) at
  26. > org.apache.spark.scheduler.Task.run(Task.scala:70) at
  27. > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
  28. > at
  29. > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
  30. > at
  31. > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
  32. > at java.lang.Thread.run(Thread.java:745)
  33. >
  34. > 15/07/03 16:36:20 INFO TaskSetManager: Lost task 1.0 in stage 0.0 (TID
  35. > 1) on executor 192.168.0.193:
  36. > org.apache.spark.api.python.PythonException (Traceback (most recent
  37. > call last): File
  38. > "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/worker.py",
  39. > line 98, in main
  40. > command = pickleSer._read_with_length(infile) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/serializers.py",
  41. > line 164, in _read_with_length
  42. > return self.loads(obj) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/serializers.py",
  43. > line 421, in loads
  44. > return pickle.loads(obj) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/cloudpickle.py",
  45. > line 629, in subimport
  46. > __import__(name) ImportError: ('No module named pandas', <function subimport at 0x7fef3731cd70>, ('pandas',)) ) [duplicate 1] 15/07/03
  47. > 16:36:20 INFO TaskSetManager: Starting task 1.1 in stage 0.0 (TID 2,
  48. > 192.168.0.193, PROCESS_LOCAL, 1491 bytes) 15/07/03 16:36:20 INFO TaskSetManager: Starting task 0.1 in stage 0.0 (TID 3, 192.168.0.193,
  49. > PROCESS_LOCAL, 1412 bytes) 15/07/03 16:36:20 INFO TaskSetManager: Lost
  50. > task 0.1 in stage 0.0 (TID 3) on executor 192.168.0.193:
  51. > org.apache.spark.api.python.PythonException (Traceback (most recent
  52. > call last): File
  53. > "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/worker.py",
  54. > line 98, in main
  55. > command = pickleSer._read_with_length(infile) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/serializers.py",
  56. > line 164, in _read_with_length
  57. > return self.loads(obj) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/serializers.py",
  58. > line 421, in loads
  59. > return pickle.loads(obj) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/cloudpickle.py",
  60. > line 629, in subimport
  61. > __import__(name) ImportError: ('No module named pandas', <function subimport at 0x7fef3731cd70>, ('pandas',)) ) [duplicate 2] 15/07/03
  62. > 16:36:20 INFO TaskSetManager: Starting task 0.2 in stage 0.0 (TID 4,
  63. > 192.168.0.193, PROCESS_LOCAL, 1412 bytes) 15/07/03 16:36:21 INFO BlockManagerInfo: Added broadcast_0_piece0 in memory on
  64. > 192.168.0.193:40099 (size: 13.7 KB, free: 265.4 MB) 15/07/03 16:36:23 WARN TaskSetManager: Lost task 1.1 in stage 0.0 (TID 2,
  65. > 192.168.0.193): org.apache.spark.api.python.PythonException: Traceback (most recent call last): File
  66. > "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/worker.py",
  67. > line 98, in main
  68. > command = pickleSer._read_with_length(infile) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/serializers.py",
  69. > line 164, in _read_with_length
  70. > return self.loads(obj) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/serializers.py",
  71. > line 421, in loads
  72. > return pickle.loads(obj) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/cloudpickle.py",
  73. > line 629, in subimport
  74. > __import__(name) ImportError: ('No module named pandas', <function subimport at 0x7fb5c3d5cd70>, ('pandas',))
  75. >
  76. > at
  77. > org.apache.spark.api.python.PythonRDD$$anon$1.read(PythonRDD.scala:138)
  78. > at
  79. > org.apache.spark.api.python.PythonRDD$$anon$1.<init>(PythonRDD.scala:179)
  80. > at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:97)
  81. > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
  82. > at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) at
  83. > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) at
  84. > org.apache.spark.scheduler.Task.run(Task.scala:70) at
  85. > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
  86. > at
  87. > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
  88. > at
  89. > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
  90. > at java.lang.Thread.run(Thread.java:745)
  91. >
  92. > 15/07/03 16:36:23 INFO TaskSetManager: Starting task 1.2 in stage 0.0
  93. > (TID 5, 192.168.0.193, PROCESS_LOCAL, 1491 bytes) 15/07/03 16:36:23
  94. > INFO TaskSetManager: Lost task 0.2 in stage 0.0 (TID 4) on executor
  95. > 192.168.0.193: org.apache.spark.api.python.PythonException (Traceback (most recent call last): File
  96. > "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/worker.py",
  97. > line 98, in main
  98. > command = pickleSer._read_with_length(infile) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/serializers.py",
  99. > line 164, in _read_with_length
  100. > return self.loads(obj) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/serializers.py",
  101. > line 421, in loads
  102. > return pickle.loads(obj) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/cloudpickle.py",
  103. > line 629, in subimport
  104. > __import__(name) ImportError: ('No module named pandas', <function subimport at 0x7fb5c3d5cd70>, ('pandas',)) ) [duplicate 1] 15/07/03
  105. > 16:36:23 INFO TaskSetManager: Starting task 0.3 in stage 0.0 (TID 6,
  106. > 192.168.0.193, PROCESS_LOCAL, 1412 bytes) 15/07/03 16:36:23 INFO TaskSetManager: Lost task 0.3 in stage 0.0 (TID 6) on executor
  107. > 192.168.0.193: org.apache.spark.api.python.PythonException (Traceback (most recent call last): File
  108. > "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/worker.py",
  109. > line 98, in main
  110. > command = pickleSer._read_with_length(infile) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/serializers.py",
  111. > line 164, in _read_with_length
  112. > return self.loads(obj) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/serializers.py",
  113. > line 421, in loads
  114. > return pickle.loads(obj) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/cloudpickle.py",
  115. > line 629, in subimport
  116. > __import__(name) ImportError: ('No module named pandas', <function subimport at 0x7fef3731cd70>, ('pandas',)) ) [duplicate 3] 15/07/03
  117. > 16:36:23 ERROR TaskSetManager: Task 0 in stage 0.0 failed 4 times;
  118. > aborting job 15/07/03 16:36:23 INFO TaskSchedulerImpl: Cancelling
  119. > stage 0 15/07/03 16:36:23 INFO TaskSchedulerImpl: Stage 0 was
  120. > cancelled 15/07/03 16:36:23 INFO DAGScheduler: ResultStage 0 (collect
  121. > at /home/roman/dev/python/AWO-72/tdms_reader.py:461) failed in 16,581
  122. > s 15/07/03 16:36:23 INFO DAGScheduler: Job 0 failed: collect at
  123. > /home/roman/dev/python/AWO-72/tdms_reader.py:461, took 17,456362 s
  124. > Traceback (most recent call last): File
  125. > "/home/roman/dev/python/AWO-72/tdms_reader.py", line 461, in <module>
  126. > rdd.map(lambda f: read_file(f)).collect() File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/rdd.py",
  127. > line 745, in collect File
  128. > "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/py4j-0.8.2.1-src.zip/py4j/java_gateway.py",
  129. > line 538, in __call__ File
  130. > "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/py4j-0.8.2.1-src.zip/py4j/protocol.py", line 300, in get_return_value py4j.protocol.Py4JJavaError: An error
  131. > occurred while calling
  132. > z:org.apache.spark.api.python.PythonRDD.collectAndServe. :
  133. > org.apache.spark.SparkException: Job aborted due to stage failure:
  134. > Task 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3
  135. > in stage 0.0 (TID 6, 192.168.0.193):
  136. > org.apache.spark.api.python.PythonException: Traceback (most recent
  137. > call last): File
  138. > "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/worker.py",
  139. > line 98, in main
  140. > command = pickleSer._read_with_length(infile) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/serializers.py",
  141. > line 164, in _read_with_length
  142. > return self.loads(obj) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/serializers.py",
  143. > line 421, in loads
  144. > return pickle.loads(obj) File "/home/roman/dev/spark-1.4.0-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/cloudpickle.py",
  145. > line 629, in subimport
  146. > __import__(name) ImportError: ('No module named pandas', <function subimport at 0x7fef3731cd70>, ('pandas',))
  147. >
  148. > at
  149. > org.apache.spark.api.python.PythonRDD$$anon$1.read(PythonRDD.scala:138)
  150. > at
  151. > org.apache.spark.api.python.PythonRDD$$anon$1.<init>(PythonRDD.scala:179)
  152. > at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:97)
  153. > at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:277)
  154. > at org.apache.spark.rdd.RDD.iterator(RDD.scala:244) at
  155. > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:63) at
  156. > org.apache.spark.scheduler.Task.run(Task.scala:70) at
  157. > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:213)
  158. > at
  159. > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
  160. > at
  161. > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
  162. > at java.lang.Thread.run(Thread.java:745)
  163. >
  164. > Driver stacktrace: at
  165. > org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1266)
  166. > at
  167. > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1257)
  168. > at
  169. > org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1256)
  170. > at
  171. > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
  172. > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
  173. > at
  174. > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1256)
  175. > at
  176. > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730)
  177. > at
  178. > org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:730)
  179. > at scala.Option.foreach(Option.scala:236) at
  180. > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:730)
  181. > at
  182. > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1450)
  183. > at
  184. > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1411)
  185. > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement