Advertisement
AbushkevichGrisha

Pyspark lag

Sep 13th, 2022
1,103
0
Never
1
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 10.65 KB | None | 0 0
  1. ---------------------------------------------------------------------------
  2. Py4JJavaError                             Traceback (most recent call last)
  3. <ipython-input-36-a53f8bf4cb69> in <module>
  4. ----> 1 df_new.show(5)
  5.  
  6. /opt/cloudera/parcels/CDH-6.3.2-1.cdh6.3.2.p0.1605554/lib/spark/python/pyspark/sql/dataframe.py in show(self, n, truncate, vertical)
  7.     376         """
  8.    377         if isinstance(truncate, bool) and truncate:
  9. --> 378             print(self._jdf.showString(n, 20, vertical))
  10.    379         else:
  11.    380             print(self._jdf.showString(n, int(truncate), vertical))
  12.  
  13. /opt/cloudera/parcels/CDH-6.3.2-1.cdh6.3.2.p0.1605554/lib/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args)
  14.   1255         answer = self.gateway_client.send_command(command)
  15.   1256         return_value = get_return_value(
  16. -> 1257             answer, self.gateway_client, self.target_id, self.name)
  17.   1258
  18.   1259         for temp_arg in temp_args:
  19.  
  20. /opt/cloudera/parcels/CDH-6.3.2-1.cdh6.3.2.p0.1605554/lib/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
  21.     61     def deco(*a, **kw):
  22.     62         try:
  23. ---> 63             return f(*a, **kw)
  24.     64         except py4j.protocol.Py4JJavaError as e:
  25.     65             s = e.java_exception.toString()
  26.  
  27. /opt/cloudera/parcels/CDH-6.3.2-1.cdh6.3.2.p0.1605554/lib/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
  28.    326                 raise Py4JJavaError(
  29.    327                     "An error occurred while calling {0}{1}{2}.\n".
  30. --> 328                     format(target_id, ".", name), value)
  31.    329             else:
  32.    330                 raise Py4JError(
  33.  
  34. Py4JJavaError: An error occurred while calling o1397.showString.
  35. : org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 22.0 failed 4 times, most recent failure: Lost task 1.3 in stage 22.0 (TID 89, sber-node03.atp-fivt.org, executor 30): org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  36.  File "/opt/cloudera/parcels/CDH-6.3.2-1.cdh6.3.2.p0.1605554/lib/spark/python/pyspark/worker.py", line 372, in main
  37.    process()
  38.  File "/opt/cloudera/parcels/CDH-6.3.2-1.cdh6.3.2.p0.1605554/lib/spark/python/pyspark/worker.py", line 367, in process
  39.    serializer.dump_stream(func(split_index, iterator), outfile)
  40.  File "/opt/cloudera/parcels/CDH-6.3.2-1.cdh6.3.2.p0.1605554/lib/spark/python/pyspark/serializers.py", line 390, in dump_stream
  41.    vs = list(itertools.islice(iterator, batch))
  42.  File "/opt/cloudera/parcels/CDH-6.3.2-1.cdh6.3.2.p0.1605554/lib/spark/python/pyspark/util.py", line 99, in wrapper
  43.    return f(*args, **kwargs)
  44.  File "<ipython-input-3-929a02861814>", line 3, in pars_wiki
  45. ValueError: too many values to unpack (expected 2)
  46.  
  47.     at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:456)
  48.     at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:592)
  49.     at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:575)
  50.     at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:410)
  51.     at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
  52.     at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
  53.     at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
  54.     at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
  55.     at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
  56.     at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
  57.     at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
  58.     at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$11$$anon$1.hasNext(WholeStageCodegenExec.scala:624)
  59.     at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
  60.     at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125)
  61.     at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
  62.     at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
  63.     at org.apache.spark.scheduler.Task.run(Task.scala:121)
  64.     at org.apache.spark.executor.Executor$TaskRunner$$anonfun$11.apply(Executor.scala:407)
  65.     at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1408)
  66.     at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:413)
  67.     at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
  68.     at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
  69.     at java.lang.Thread.run(Thread.java:748)
  70.  
  71. Driver stacktrace:
  72.     at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1890)
  73.     at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1878)
  74.     at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1877)
  75.     at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
  76.     at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
  77.     at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1877)
  78.     at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:929)
  79.     at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:929)
  80.     at scala.Option.foreach(Option.scala:257)
  81.     at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:929)
  82.     at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2111)
  83.     at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2060)
  84.     at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2049)
  85.     at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49)
  86.     at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:740)
  87.     at org.apache.spark.SparkContext.runJob(SparkContext.scala:2081)
  88.     at org.apache.spark.SparkContext.runJob(SparkContext.scala:2102)
  89.     at org.apache.spark.SparkContext.runJob(SparkContext.scala:2121)
  90.     at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:365)
  91.     at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38)
  92.     at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3383)
  93.     at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2544)
  94.     at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2544)
  95.     at org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3364)
  96.     at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78)
  97.     at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125)
  98.     at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73)
  99.     at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3363)
  100.     at org.apache.spark.sql.Dataset.head(Dataset.scala:2544)
  101.     at org.apache.spark.sql.Dataset.take(Dataset.scala:2758)
  102.     at org.apache.spark.sql.Dataset.getRows(Dataset.scala:254)
  103.     at org.apache.spark.sql.Dataset.showString(Dataset.scala:291)
  104.     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
  105.     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
  106.     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  107.     at java.lang.reflect.Method.invoke(Method.java:498)
  108.     at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
  109.     at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
  110.     at py4j.Gateway.invoke(Gateway.java:282)
  111.     at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
  112.     at py4j.commands.CallCommand.execute(CallCommand.java:79)
  113.     at py4j.GatewayConnection.run(GatewayConnection.java:238)
  114.     at java.lang.Thread.run(Thread.java:748)
  115. Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last):
  116.  File "/opt/cloudera/parcels/CDH-6.3.2-1.cdh6.3.2.p0.1605554/lib/spark/python/pyspark/worker.py", line 372, in main
  117.    process()
  118.  File "/opt/cloudera/parcels/CDH-6.3.2-1.cdh6.3.2.p0.1605554/lib/spark/python/pyspark/worker.py", line 367, in process
  119.    serializer.dump_stream(func(split_index, iterator), outfile)
  120.  File "/opt/cloudera/parcels/CDH-6.3.2-1.cdh6.3.2.p0.1605554/lib/spark/python/pyspark/serializers.py", line 390, in dump_stream
  121.    vs = list(itertools.islice(iterator, batch))
  122.  File "/opt/cloudera/parcels/CDH-6.3.2-1.cdh6.3.2.p0.1605554/lib/spark/python/pyspark/util.py", line 99, in wrapper
  123.    return f(*args, **kwargs)
  124.  File "<ipython-input-3-929a02861814>", line 3, in pars_wiki
  125. ValueError: too many values to unpack (expected 2)
  126.  
  127.     at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:456)
  128.     at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:592)
  129.     at org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:575)
  130.     at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:410)
  131.     at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)
  132.     at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440)
  133.     at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
  134.     at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
  135.     at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
  136.     at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown Source)
  137.     at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)
  138.     at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$11$$anon$1.hasNext(WholeStageCodegenExec.scala:624)
  139.     at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409)
  140.     at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:125)
  141.     at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)
  142.     at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:55)
  143.     at org.apache.spark.scheduler.Task.run(Task.scala:121)
  144.     at org.apache.spark.executor.Executor$TaskRunner$$anonfun$11.apply(Executor.scala:407)
  145.     at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1408)
  146.     at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:413)
  147.     at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
  148.     at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
  149.     ... 1 more
  150. </spoiler>
  151.  
Tags: pyspark
Advertisement
Comments
  • dynomo1124
    1 year (edited)
    # text 0.51 KB | 0 0
    1. A https://techrom.net/ is one of the best app stores where you” ll get all the things for the best mode app.Welcome to <a href="http://keralauniversity.org/">Kerala University</a>. Here, KUORG we provide the latest updates on various topics globe. We are on of the top and also the fastest growing news site around the.<a href="https://tutuappapkfree.com/youtube-plus/">youtube plus</a> Free Apk – It tends to be said that Spotify Music is the “ruler” in the field of giving music online as this application claims
    2.  
Add Comment
Please, Sign In to add comment
Advertisement