Advertisement
Guest User

Untitled

a guest
Jun 20th, 2019
135
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.47 KB | None | 0 0
  1. pdf = df.toPandas()
  2.  
  3. ---------------------------------------------------------------------------
  4. Py4JJavaError Traceback (most recent call last)
  5. <timed exec> in <module>
  6.  
  7. /usr/local/spark/python/pyspark/sql/dataframe.py in toPandas(self)
  8. 2140
  9. 2141 # Below is toPandas without Arrow optimization.
  10. -> 2142 pdf = pd.DataFrame.from_records(self.collect(), columns=self.columns)
  11. 2143
  12. 2144 dtype = {}
  13.  
  14. /usr/local/spark/python/pyspark/sql/dataframe.py in collect(self)
  15. 531 """
  16. 532 with SCCallSiteSync(self._sc) as css:
  17. --> 533 sock_info = self._jdf.collectToPython()
  18. 534 return list(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer())))
  19. 535
  20.  
  21. /usr/local/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args)
  22. 1255 answer = self.gateway_client.send_command(command)
  23. 1256 return_value = get_return_value(
  24. -> 1257 answer, self.gateway_client, self.target_id, self.name)
  25. 1258
  26. 1259 for temp_arg in temp_args:
  27.  
  28. /usr/local/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
  29. 61 def deco(*a, **kw):
  30. 62 try:
  31. ---> 63 return f(*a, **kw)
  32. 64 except py4j.protocol.Py4JJavaError as e:
  33. 65 s = e.java_exception.toString()
  34.  
  35. /usr/local/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
  36. 326 raise Py4JJavaError(
  37. 327 "An error occurred while calling {0}{1}{2}.n".
  38. --> 328 format(target_id, ".", name), value)
  39. 329 else:
  40. 330 raise Py4JError(
  41.  
  42. Py4JJavaError: An error occurred while calling o99.collectToPython.
  43. : org.apache.spark.SparkException: Job aborted due to stage failure: Task 64 in stage 6.0 failed 1 times, most recent failure: Lost task 64.0 in stage 6.0 (TID 780, localhost, executor driver): java.lang.OutOfMemoryError: Java heap space
  44.  
  45. Driver stacktrace:
  46. at org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:1889)
  47. at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:1877)
  48. at org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:1876)
  49. at scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62)
  50. at scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55)
  51. at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49)
  52. at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1876)
  53. at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:926)
  54. at org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:926)
  55. at scala.Option.foreach(Option.scala:274)
  56. at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926)
  57. at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2110)
  58. at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2059)
  59. at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2048)
  60. at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
  61. at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737)
  62. at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061)
  63. at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082)
  64. at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101)
  65. at org.apache.spark.SparkContext.runJob(SparkContext.scala:2126)
  66. at org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:945)
  67. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  68. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
  69. at org.apache.spark.rdd.RDD.withScope(RDD.scala:363)
  70. at org.apache.spark.rdd.RDD.collect(RDD.scala:944)
  71. at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:299)
  72. at org.apache.spark.sql.Dataset.$anonfun$collectToPython$1(Dataset.scala:3257)
  73. at org.apache.spark.sql.Dataset.$anonfun$withAction$2(Dataset.scala:3364)
  74. at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:78)
  75. at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
  76. at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
  77. at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3364)
  78. at org.apache.spark.sql.Dataset.collectToPython(Dataset.scala:3254)
  79. at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
  80. at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
  81. at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  82. at java.lang.reflect.Method.invoke(Method.java:498)
  83. at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
  84. at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
  85. at py4j.Gateway.invoke(Gateway.java:282)
  86. at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
  87. at py4j.commands.CallCommand.execute(CallCommand.java:79)
  88. at py4j.GatewayConnection.run(GatewayConnection.java:238)
  89. at java.lang.Thread.run(Thread.java:748)
  90. Caused by: java.lang.OutOfMemoryError: Java heap space
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement