Advertisement
Guest User

Stacktrace

a guest
Apr 26th, 2016
13
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.04 KB | None | 0 0
  1. ---------------------------------------------------------------------------
  2. Py4JJavaError Traceback (most recent call last)
  3. <ipython-input-30-46176c96d218> in <module>()
  4. ----> 1 series[2].toarray()
  5.  
  6. /Users/Henry/miniconda/envs/thunder1_env/lib/python2.7/site-packages/thunder/base.pyc in toarray(self)
  7. 262 This will be slow for large datasets, and may exhaust the available memory on the driver.
  8. 263 """
  9. --> 264 return asarray(self.values).squeeze()
  10. 265
  11. 266 def tospark(self):
  12.  
  13. /Users/Henry/miniconda/envs/thunder1_env/lib/python2.7/site-packages/numpy/core/numeric.pyc in asarray(a, dtype, order)
  14. 480
  15. 481 """
  16. --> 482 return array(a, dtype, copy=False, order=order)
  17. 483
  18. 484 def asanyarray(a, dtype=None, order=None):
  19.  
  20. /Users/Henry/miniconda/envs/thunder1_env/lib/python2.7/site-packages/bolt/spark/array.pyc in __array__(self)
  21. 33
  22. 34 def __array__(self):
  23. ---> 35 return self.toarray()
  24. 36
  25. 37 def cache(self):
  26.  
  27. /Users/Henry/miniconda/envs/thunder1_env/lib/python2.7/site-packages/bolt/spark/array.pyc in toarray(self)
  28. 984 """
  29. 985 rdd = self._rdd if self._ordered else self._rdd.sortByKey()
  30. --> 986 x = rdd.values().collect()
  31. 987 return asarray(x).reshape(self.shape)
  32. 988
  33.  
  34. /Applications/spark-1.6.1-bin-hadoop2.6/python/pyspark/rdd.pyc in collect(self)
  35. 769 """
  36. 770 with SCCallSiteSync(self.context) as css:
  37. --> 771 port = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
  38. 772 return list(_load_from_socket(port, self._jrdd_deserializer))
  39. 773
  40.  
  41. /Applications/spark-1.6.1-bin-hadoop2.6/python/lib/py4j-0.9-src.zip/py4j/java_gateway.py in __call__(self, *args)
  42. 811 answer = self.gateway_client.send_command(command)
  43. 812 return_value = get_return_value(
  44. --> 813 answer, self.gateway_client, self.target_id, self.name)
  45. 814
  46. 815 for temp_arg in temp_args:
  47.  
  48. /Applications/spark-1.6.1-bin-hadoop2.6/python/lib/py4j-0.9-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
  49. 306 raise Py4JJavaError(
  50. 307 "An error occurred while calling {0}{1}{2}.\n".
  51. --> 308 format(target_id, ".", name), value)
  52. 309 else:
  53. 310 raise Py4JError(
  54.  
  55. Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.
  56. : org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 33.0 failed 1 times, most recent failure: Lost task 0.0 in stage 33.0 (TID 92, localhost): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  57. File "/Applications/spark-1.6.1-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/worker.py", line 111, in main
  58. process()
  59. File "/Applications/spark-1.6.1-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/worker.py", line 106, in process
  60. serializer.dump_stream(func(split_index, iterator), outfile)
  61. File "/Applications/spark-1.6.1-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/serializers.py", line 263, in dump_stream
  62. vs = list(itertools.islice(iterator, batch))
  63. File "/Users/Henry/miniconda/envs/thunder1_env/lib/python2.7/site-packages/bolt/spark/array.py", line 482, in <lambda>
  64. filtered = self._rdd.filter(lambda kv: key_check(kv[0]))
  65. File "/Users/Henry/miniconda/envs/thunder1_env/lib/python2.7/site-packages/bolt/spark/array.py", line 476, in key_check
  66. out = [check(k, s) for k, s in zip(key, key_slices)]
  67. TypeError: zip argument #1 must support iteration
  68.  
  69. at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRDD.scala:166)
  70. at org.apache.spark.api.python.PythonRunner$$anon$1.<init>(PythonRDD.scala:207)
  71. at org.apache.spark.api.python.PythonRunner.compute(PythonRDD.scala:125)
  72. at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:70)
  73. at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
  74. at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
  75. at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
  76. at org.apache.spark.scheduler.Task.run(Task.scala:89)
  77. at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
  78. at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
  79. at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
  80. at java.lang.Thread.run(Thread.java:745)
  81.  
  82. Driver stacktrace:
  83. at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1431)
  84. at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1419)
  85. at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1418)
  86. at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
  87. at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
  88. at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1418)
  89. at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
  90. at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:799)
  91. at scala.Option.foreach(Option.scala:236)
  92. at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:799)
  93. at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1640)
  94. at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1599)
  95. at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1588)
  96. at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
  97. at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:620)
  98. at org.apache.spark.SparkContext.runJob(SparkContext.scala:1832)
  99. at org.apache.spark.SparkContext.runJob(SparkContext.scala:1845)
  100. at org.apache.spark.SparkContext.runJob(SparkContext.scala:1858)
  101. at org.apache.spark.SparkContext.runJob(SparkContext.scala:1929)
  102. at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:927)
  103. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
  104. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
  105. at org.apache.spark.rdd.RDD.withScope(RDD.scala:316)
  106. at org.apache.spark.rdd.RDD.collect(RDD.scala:926)
  107. at org.apache.spark.api.python.PythonRDD$.collectAndServe(PythonRDD.scala:405)
  108. at org.apache.spark.api.python.PythonRDD.collectAndServe(PythonRDD.scala)
  109. at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
  110. at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
  111. at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  112. at java.lang.reflect.Method.invoke(Method.java:497)
  113. at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:231)
  114. at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:381)
  115. at py4j.Gateway.invoke(Gateway.java:259)
  116. at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:133)
  117. at py4j.commands.CallCommand.execute(CallCommand.java:79)
  118. at py4j.GatewayConnection.run(GatewayConnection.java:209)
  119. at java.lang.Thread.run(Thread.java:745)
  120. Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  121. File "/Applications/spark-1.6.1-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/worker.py", line 111, in main
  122. process()
  123. File "/Applications/spark-1.6.1-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/worker.py", line 106, in process
  124. serializer.dump_stream(func(split_index, iterator), outfile)
  125. File "/Applications/spark-1.6.1-bin-hadoop2.6/python/lib/pyspark.zip/pyspark/serializers.py", line 263, in dump_stream
  126. vs = list(itertools.islice(iterator, batch))
  127. File "/Users/Henry/miniconda/envs/thunder1_env/lib/python2.7/site-packages/bolt/spark/array.py", line 482, in <lambda>
  128. filtered = self._rdd.filter(lambda kv: key_check(kv[0]))
  129. File "/Users/Henry/miniconda/envs/thunder1_env/lib/python2.7/site-packages/bolt/spark/array.py", line 476, in key_check
  130. out = [check(k, s) for k, s in zip(key, key_slices)]
  131. TypeError: zip argument #1 must support iteration
  132.  
  133. at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRDD.scala:166)
  134. at org.apache.spark.api.python.PythonRunner$$anon$1.<init>(PythonRDD.scala:207)
  135. at org.apache.spark.api.python.PythonRunner.compute(PythonRDD.scala:125)
  136. at org.apache.spark.api.python.PythonRDD.compute(PythonRDD.scala:70)
  137. at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:306)
  138. at org.apache.spark.rdd.RDD.iterator(RDD.scala:270)
  139. at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
  140. at org.apache.spark.scheduler.Task.run(Task.scala:89)
  141. at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:214)
  142. at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
  143. at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
  144. ... 1 more
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement