Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Py4JJavaError: An error occurred while calling o44.collectToPython.
- : org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
- Exchange rangepartitioning(latitude#30 ASC NULLS FIRST, longitude#31 ASC NULLS FIRST, 200)
- +- *HashAggregate(keys=[substring(field#40, 1, 6)#58, substring(field2)#59], functions=[], output=[latitude#30, longitude#31])
- +- Exchange hashpartitioning(substring(field#40, 1, 6)#58, substring(field2#41, 1, 6)#59, 200)
- +- *HashAggregate(keys=[substring(field#40, 1, 6) AS substring(field#40, 1, 6)#58, substring(field2#41, 1, 6) AS substring(field2#41, 1, 6)#59], functions=[], output=[substring(field#40, 1, 6)#58, substring(field2#41, 1, 6)#59])
- +- HiveTableScan [field#40, field2#41], HiveTableRelation `scheme`.`table_name`, org.apache.hadoop.hive.ql.io.orc.OrcSerde, [FIELDS LIST], [isnotnull(field3#52), isnotnull(field4#51), isnotnull(field5#50), (field6#50 >= 17328), (time_key#50 <= 17365), (cast(fileld3#52 as int) = 5201), (cast(field4#51 as int) = 52)]
- at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
- at org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:115)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
- at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
- at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:252)
- at org.apache.spark.sql.execution.SortExec.inputRDDs(SortExec.scala:121)
- at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:386)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
- at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
- at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:228)
- at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:275)
- at org.apache.spark.sql.Dataset$$anonfun$collectToPython$1.apply$mcI$sp(Dataset.scala:2804)
- at org.apache.spark.sql.Dataset$$anonfun$collectToPython$1.apply(Dataset.scala:2801)
- at org.apache.spark.sql.Dataset$$anonfun$collectToPython$1.apply(Dataset.scala:2801)
- at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65)
- at org.apache.spark.sql.Dataset.withNewExecutionId(Dataset.scala:2824)
- at org.apache.spark.sql.Dataset.collectToPython(Dataset.scala:2801)
- at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
- at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
- at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
- at java.lang.reflect.Method.invoke(Method.java:498)
- at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
- at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
- at py4j.Gateway.invoke(Gateway.java:280)
- at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
- at py4j.commands.CallCommand.execute(CallCommand.java:79)
- at py4j.GatewayConnection.run(GatewayConnection.java:214)
- at java.lang.Thread.run(Thread.java:748)
- Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
- Exchange hashpartitioning(substring(field#40, 1, 6)#58, substring(field2#41, 1, 6)#59, 200)
- +- *HashAggregate(keys=[substring(field#40, 1, 6) AS substring(field#40, 1, 6)#58, substring(field2#41, 1, 6) AS substring(cell_list_avg_lon#41, 1, 6)#59], functions=[], output=[substring(field#40, 1, 6)#58, substring(field2#41, 1, 6)#59])
- +- HiveTableScan [field#40, field2#41], HiveTableRelation `scheme`.`table_name`, org.apache.hadoop.hive.ql.io.orc.OrcSerde, [FIELDS LIST], [isnotnull(field3#52), isnotnull(field4#51), isnotnull(field5#50), (field6#50 >= 17328), (time_key#50 <= 17365), (cast(fileld3#52 as int) = 5201), (cast(field4#51 as int) = 52)]
- at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
- at org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:115)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
- at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
- at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:252)
- at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:141)
- at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:386)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
- at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
- at org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:88)
- at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:124)
- at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:115)
- at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
- ... 35 more
- Caused by: org.apache.spark.sql.AnalysisException: java.lang.OutOfMemoryError: GC overhead limit exceeded;
- at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:106)
- at org.apache.spark.sql.hive.HiveExternalCatalog.listPartitionsByFilter(HiveExternalCatalog.scala:1107)
- at org.apache.spark.sql.hive.execution.HiveTableScanExec.rawPartitions$lzycompute(HiveTableScanExec.scala:170)
- at org.apache.spark.sql.hive.execution.HiveTableScanExec.rawPartitions(HiveTableScanExec.scala:164)
- at org.apache.spark.sql.hive.execution.HiveTableScanExec$$anonfun$11.apply(HiveTableScanExec.scala:192)
- at org.apache.spark.sql.hive.execution.HiveTableScanExec$$anonfun$11.apply(HiveTableScanExec.scala:192)
- at org.apache.spark.util.Utils$.withDummyCallSite(Utils.scala:2472)
- at org.apache.spark.sql.hive.execution.HiveTableScanExec.doExecute(HiveTableScanExec.scala:191)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
- at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
- at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:252)
- at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:141)
- at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:386)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
- at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
- at org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:88)
- at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:124)
- at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:115)
- at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
- ... 55 more
- Caused by: java.lang.OutOfMemoryError: GC overhead limit exceeded
- at org.apache.hadoop.hive.metastore.api.StorageDescriptor.<init>(StorageDescriptor.java:256)
- at org.apache.hadoop.hive.metastore.api.Partition.<init>(Partition.java:216)
- at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.deepCopy(HiveMetaStoreClient.java:1563)
- at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.deepCopyPartitions(HiveMetaStoreClient.java:1637)
- at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.deepCopyPartitions(HiveMetaStoreClient.java:1625)
- at org.apache.hadoop.hive.metastore.HiveMetaStoreClient.listPartitions(HiveMetaStoreClient.java:1050)
- at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
- at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
- at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
- at java.lang.reflect.Method.invoke(Method.java:498)
- at org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:156)
- at com.sun.proxy.$Proxy28.listPartitions(Unknown Source)
- at org.apache.hadoop.hive.ql.metadata.Hive.getAllPartitionsOf(Hive.java:2096)
- at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
- at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
- at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
- at java.lang.reflect.Method.invoke(Method.java:498)
- at org.apache.spark.sql.hive.client.Shim_v0_13.getPartitionsByFilter(HiveShim.scala:628)
- at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$getPartitionsByFilter$1.apply(HiveClientImpl.scala:596)
- at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$getPartitionsByFilter$1.apply(HiveClientImpl.scala:594)
- at org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$withHiveState$1.apply(HiveClientImpl.scala:291)
- at org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:232)
- at org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:231)
- at org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:274)
- at org.apache.spark.sql.hive.client.HiveClientImpl.getPartitionsByFilter(HiveClientImpl.scala:594)
- at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$listPartitionsByFilter$1.apply(HiveExternalCatalog.scala:1114)
- at org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$listPartitionsByFilter$1.apply(HiveExternalCatalog.scala:1107)
- at org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
- at org.apache.spark.sql.hive.HiveExternalCatalog.listPartitionsByFilter(HiveExternalCatalog.scala:1107)
- at org.apache.spark.sql.hive.execution.HiveTableScanExec.rawPartitions$lzycompute(HiveTableScanExec.scala:170)
- at org.apache.spark.sql.hive.execution.HiveTableScanExec.rawPartitions(HiveTableScanExec.scala:164)
- at org.apache.spark.sql.hive.execution.HiveTableScanExec$$anonfun$11.apply(HiveTableScanExec.scala:192)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement