Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ---------------------------------------------------------------------------
- Py4JJavaError Traceback (most recent call last)
- <command-2672791667097299> in <module>()
- ----> 1 dfLoadHitDataPostEvar.where(~F.col('EventDecimal').like('0')).head(10)
- /databricks/spark/python/pyspark/sql/dataframe.py in head(self, n)
- 1275 rs = self.head(1)
- 1276 return rs[0] if rs else None
- -> 1277 return self.take(n)
- 1278
- 1279 @ignore_unicode_prefix
- /databricks/spark/python/pyspark/sql/dataframe.py in take(self, num)
- 587 [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
- 588 """
- --> 589 return self.limit(num).collect()
- 590
- 591 @since(1.3)
- /databricks/spark/python/pyspark/sql/dataframe.py in collect(self)
- 546 # Default path used in OSS Spark / for non-DF-ACL clusters:
- 547 with SCCallSiteSync(self._sc) as css:
- --> 548 sock_info = self._jdf.collectToPython()
- 549 return list(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer())))
- 550
- /databricks/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args)
- 1255 answer = self.gateway_client.send_command(command)
- 1256 return_value = get_return_value(
- -> 1257 answer, self.gateway_client, self.target_id, self.name)
- 1258
- 1259 for temp_arg in temp_args:
- /databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
- 61 def deco(*a, **kw):
- 62 try:
- ---> 63 return f(*a, **kw)
- 64 except py4j.protocol.Py4JJavaError as e:
- 65 s = e.java_exception.toString()
- /databricks/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
- 326 raise Py4JJavaError(
- 327 "An error occurred while calling {0}{1}{2}.\n".
- --> 328 format(target_id, ".", name), value)
- 329 else:
- 330 raise Py4JError(
- Py4JJavaError: An error occurred while calling o1189.collectToPython.
- : org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
- SortAggregate(key=[post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230], functions=[finalmerge_max(merge max#7297) AS max(columnValue#7212)#7262, finalmerge_max(merge max#7299) AS max(ExistOnEventList#7231)#7264], output=[post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, columnValue#7263, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230, ExistOnEventList#7265])
- +- *(4) Sort [post_visid_high#1403 ASC NULLS FIRST, post_visid_low#1404 ASC NULLS FIRST, hitid_high#1198 ASC NULLS FIRST, hitid_low#1199 ASC NULLS FIRST, visit_num#1438 ASC NULLS FIRST, visit_page_num#1439 ASC NULLS FIRST, visit_start_time_gmt#1444 ASC NULLS FIRST, date_time#1182 ASC NULLS FIRST, EventNumber#2355 ASC NULLS FIRST, columnName#7128 ASC NULLS FIRST, ExistOnHit#7130 ASC NULLS FIRST, BizEvarPropEventNumberKey#2354 ASC NULLS FIRST, EventDecimal#7230 ASC NULLS FIRST], false, 0
- +- Exchange hashpartitioning(post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230, 200)
- +- SortAggregate(key=[post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230], functions=[partial_max(columnValue#7212) AS max#7297, partial_max(ExistOnEventList#7231) AS max#7299], output=[post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230, max#7297, max#7299])
- +- *(3) Sort [post_visid_high#1403 ASC NULLS FIRST, post_visid_low#1404 ASC NULLS FIRST, hitid_high#1198 ASC NULLS FIRST, hitid_low#1199 ASC NULLS FIRST, visit_num#1438 ASC NULLS FIRST, visit_page_num#1439 ASC NULLS FIRST, visit_start_time_gmt#1444 ASC NULLS FIRST, date_time#1182 ASC NULLS FIRST, EventNumber#2355 ASC NULLS FIRST, columnName#7128 ASC NULLS FIRST, ExistOnHit#7130 ASC NULLS FIRST, BizEvarPropEventNumberKey#2354 ASC NULLS FIRST, EventDecimal#7230 ASC NULLS FIRST], false, 0
- +- *(3) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, columnValue#7212, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, coalesce(cast(EventDecimal#7131 as int), 0) AS EventDecimal#7230, coalesce(ExistOnEventList#7132, 0) AS ExistOnEventList#7231]
- +- *(3) HashAggregate(keys=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404], functions=[], output=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404])
- +- Exchange hashpartitioning(ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404, 200)
- +- *(2) HashAggregate(keys=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404], functions=[], output=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404])
- +- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2340, EventDecimal#2341, ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, columnName#7128, columnValue#8155, ExistOnHit#8156, EventNumber#2355, CASE WHEN isnotnull(EventDecimal#2341) THEN EventDecimal#2341 ELSE 0 END AS EventDecimal#7131]
- +- *(2) BroadcastHashJoin [cast(EventNumber#2340 as int)], [EventNumber#2355], Inner, BuildRight, false
- :- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, substring_index(EventNumber#2328, =, 1) AS EventNumber#2340, cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0)) AS EventDecimal#2341]
- : +- *(2) Filter (NOT (cast(coalesce(cast(CASE WHEN isnotnull(cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0))) THEN cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0)) ELSE 0 END as int), 0) as string) = 0) && isnotnull(substring_index(EventNumber#2328, =, 1)))
- : +- *(2) Generate explode(split(post_event_list#1298, ,)), [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182], false, [EventNumber#2328]
- : +- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, post_event_list#1298]
- : +- *(2) FileScan csv [date_time#1182,hitid_high#1198,hitid_low#1199,post_event_list#1298,post_visid_high#1403,post_visid_low#1404,visit_num#1438,visit_page_num#1439,visit_start_time_gmt#1444] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[dbfs:/mnt/dev-raw/sitecat/hit_data/2019/01/21], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<date_time:string,hitid_high:string,hitid_low:string,post_event_list:string,post_visid_high...
- +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[5, int, false] as bigint)))
- +- *(1) HashAggregate(keys=[1 AS ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, EventEvarPropName#2356 AS columnName#7128, null AS columnValue#8155, 1 AS ExistOnHit#8156, EventNumber#2355], functions=[], output=[ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, columnName#7128, columnValue#8155, ExistOnHit#8156, EventNumber#2355])
- +- *(1) Project [BizEvarPropEventNumberKey#2354 AS BizEvarPropEventNumberKey#2354, EventNumber#2355 AS EventNumber#2355, EventEvarPropName#2356 AS EventEvarPropName#2356]
- +- *(1) Scan SqlDWRelation((SELECT * FROM (SELECT [...] pEventNumberKey" > 1))) q} ) [BizEvarPropEventNumberKey#2354,EventNumber#2355,EventEvarPropName#2356] PushedFilters: [], ReadSchema: struct<BizEvarPropEventNumberKey:int,EventNumber:int,EventEvarPropName:string>
- at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
- at org.apache.spark.sql.execution.aggregate.SortAggregateExec.doExecute(SortAggregateExec.scala:93)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:143)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
- at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:131)
- at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:66)
- at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:75)
- at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResult(ResultCacheManager.scala:497)
- at org.apache.spark.sql.execution.CollectLimitExec.executeCollectResult(limit.scala:48)
- at org.apache.spark.sql.Dataset$$anonfun$51.apply(Dataset.scala:3289)
- at org.apache.spark.sql.Dataset$$anonfun$51.apply(Dataset.scala:3288)
- at org.apache.spark.sql.Dataset$$anonfun$55.apply(Dataset.scala:3423)
- at org.apache.spark.sql.execution.SQLExecution$$anonfun$withCustomExecutionEnv$1.apply(SQLExecution.scala:99)
- at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:228)
- at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:85)
- at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:158)
- at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withAction(Dataset.scala:3422)
- at org.apache.spark.sql.Dataset.collectToPython(Dataset.scala:3288)
- at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
- at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
- at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
- at java.lang.reflect.Method.invoke(Method.java:498)
- at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
- at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
- at py4j.Gateway.invoke(Gateway.java:295)
- at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
- at py4j.commands.CallCommand.execute(CallCommand.java:79)
- at py4j.GatewayConnection.run(GatewayConnection.java:251)
- at java.lang.Thread.run(Thread.java:748)
- Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
- Exchange hashpartitioning(post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230, 200)
- +- SortAggregate(key=[post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230], functions=[partial_max(columnValue#7212) AS max#7297, partial_max(ExistOnEventList#7231) AS max#7299], output=[post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230, max#7297, max#7299])
- +- *(3) Sort [post_visid_high#1403 ASC NULLS FIRST, post_visid_low#1404 ASC NULLS FIRST, hitid_high#1198 ASC NULLS FIRST, hitid_low#1199 ASC NULLS FIRST, visit_num#1438 ASC NULLS FIRST, visit_page_num#1439 ASC NULLS FIRST, visit_start_time_gmt#1444 ASC NULLS FIRST, date_time#1182 ASC NULLS FIRST, EventNumber#2355 ASC NULLS FIRST, columnName#7128 ASC NULLS FIRST, ExistOnHit#7130 ASC NULLS FIRST, BizEvarPropEventNumberKey#2354 ASC NULLS FIRST, EventDecimal#7230 ASC NULLS FIRST], false, 0
- +- *(3) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, columnValue#7212, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, coalesce(cast(EventDecimal#7131 as int), 0) AS EventDecimal#7230, coalesce(ExistOnEventList#7132, 0) AS ExistOnEventList#7231]
- +- *(3) HashAggregate(keys=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404], functions=[], output=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404])
- +- Exchange hashpartitioning(ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404, 200)
- +- *(2) HashAggregate(keys=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404], functions=[], output=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404])
- +- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2340, EventDecimal#2341, ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, columnName#7128, columnValue#8155, ExistOnHit#8156, EventNumber#2355, CASE WHEN isnotnull(EventDecimal#2341) THEN EventDecimal#2341 ELSE 0 END AS EventDecimal#7131]
- +- *(2) BroadcastHashJoin [cast(EventNumber#2340 as int)], [EventNumber#2355], Inner, BuildRight, false
- :- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, substring_index(EventNumber#2328, =, 1) AS EventNumber#2340, cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0)) AS EventDecimal#2341]
- : +- *(2) Filter (NOT (cast(coalesce(cast(CASE WHEN isnotnull(cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0))) THEN cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0)) ELSE 0 END as int), 0) as string) = 0) && isnotnull(substring_index(EventNumber#2328, =, 1)))
- : +- *(2) Generate explode(split(post_event_list#1298, ,)), [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182], false, [EventNumber#2328]
- : +- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, post_event_list#1298]
- : +- *(2) FileScan csv [date_time#1182,hitid_high#1198,hitid_low#1199,post_event_list#1298,post_visid_high#1403,post_visid_low#1404,visit_num#1438,visit_page_num#1439,visit_start_time_gmt#1444] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[dbfs:/mnt/dev-raw/sitecat/hit_data/2019/01/21], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<date_time:string,hitid_high:string,hitid_low:string,post_event_list:string,post_visid_high...
- +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[5, int, false] as bigint)))
- +- *(1) HashAggregate(keys=[1 AS ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, EventEvarPropName#2356 AS columnName#7128, null AS columnValue#8155, 1 AS ExistOnHit#8156, EventNumber#2355], functions=[], output=[ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, columnName#7128, columnValue#8155, ExistOnHit#8156, EventNumber#2355])
- +- *(1) Project [BizEvarPropEventNumberKey#2354 AS BizEvarPropEventNumberKey#2354, EventNumber#2355 AS EventNumber#2355, EventEvarPropName#2356 AS EventEvarPropName#2356]
- +- *(1) Scan SqlDWRelation((SELECT * FROM (SELECT [...] pEventNumberKey" > 1))) q} ) [BizEvarPropEventNumberKey#2354,EventNumber#2355,EventEvarPropName#2356] PushedFilters: [], ReadSchema: struct<BizEvarPropEventNumberKey:int,EventNumber:int,EventEvarPropName:string>
- at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
- at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:119)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:143)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
- at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:131)
- at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:389)
- at org.apache.spark.sql.execution.SortExec.inputRDDs(SortExec.scala:121)
- at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:625)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:143)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
- at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:131)
- at org.apache.spark.sql.execution.aggregate.SortAggregateExec$$anonfun$doExecute$1.apply(SortAggregateExec.scala:95)
- at org.apache.spark.sql.execution.aggregate.SortAggregateExec$$anonfun$doExecute$1.apply(SortAggregateExec.scala:93)
- at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
- ... 31 more
- Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
- SortAggregate(key=[post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230], functions=[partial_max(columnValue#7212) AS max#7297, partial_max(ExistOnEventList#7231) AS max#7299], output=[post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230, max#7297, max#7299])
- *** WARNING: skipped 896 bytes of output ***
- +- *(3) HashAggregate(keys=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404], functions=[], output=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404])
- +- Exchange hashpartitioning(ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404, 200)
- +- *(2) HashAggregate(keys=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404], functions=[], output=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404])
- +- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2340, EventDecimal#2341, ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, columnName#7128, columnValue#8155, ExistOnHit#8156, EventNumber#2355, CASE WHEN isnotnull(EventDecimal#2341) THEN EventDecimal#2341 ELSE 0 END AS EventDecimal#7131]
- +- *(2) BroadcastHashJoin [cast(EventNumber#2340 as int)], [EventNumber#2355], Inner, BuildRight, false
- :- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, substring_index(EventNumber#2328, =, 1) AS EventNumber#2340, cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0)) AS EventDecimal#2341]
- : +- *(2) Filter (NOT (cast(coalesce(cast(CASE WHEN isnotnull(cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0))) THEN cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0)) ELSE 0 END as int), 0) as string) = 0) && isnotnull(substring_index(EventNumber#2328, =, 1)))
- : +- *(2) Generate explode(split(post_event_list#1298, ,)), [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182], false, [EventNumber#2328]
- : +- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, post_event_list#1298]
- : +- *(2) FileScan csv [date_time#1182,hitid_high#1198,hitid_low#1199,post_event_list#1298,post_visid_high#1403,post_visid_low#1404,visit_num#1438,visit_page_num#1439,visit_start_time_gmt#1444] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[dbfs:/mnt/dev-raw/sitecat/hit_data/2019/01/21], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<date_time:string,hitid_high:string,hitid_low:string,post_event_list:string,post_visid_high...
- +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[5, int, false] as bigint)))
- +- *(1) HashAggregate(keys=[1 AS ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, EventEvarPropName#2356 AS columnName#7128, null AS columnValue#8155, 1 AS ExistOnHit#8156, EventNumber#2355], functions=[], output=[ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, columnName#7128, columnValue#8155, ExistOnHit#8156, EventNumber#2355])
- +- *(1) Project [BizEvarPropEventNumberKey#2354 AS BizEvarPropEventNumberKey#2354, EventNumber#2355 AS EventNumber#2355, EventEvarPropName#2356 AS EventEvarPropName#2356]
- +- *(1) Scan SqlDWRelation((SELECT * FROM (SELECT [...] pEventNumberKey" > 1))) q} ) [BizEvarPropEventNumberKey#2354,EventNumber#2355,EventEvarPropName#2356] PushedFilters: [], ReadSchema: struct<BizEvarPropEventNumberKey:int,EventNumber:int,EventEvarPropName:string>
- at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
- at org.apache.spark.sql.execution.aggregate.SortAggregateExec.doExecute(SortAggregateExec.scala:93)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:143)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
- at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:131)
- at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.prepareShuffleDependency(ShuffleExchangeExec.scala:92)
- at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$$anonfun$doExecute$1.apply(ShuffleExchangeExec.scala:128)
- at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$$anonfun$doExecute$1.apply(ShuffleExchangeExec.scala:119)
- at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
- ... 50 more
- Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
- Exchange hashpartitioning(ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404, 200)
- +- *(2) HashAggregate(keys=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404], functions=[], output=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404])
- +- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2340, EventDecimal#2341, ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, columnName#7128, columnValue#8155, ExistOnHit#8156, EventNumber#2355, CASE WHEN isnotnull(EventDecimal#2341) THEN EventDecimal#2341 ELSE 0 END AS EventDecimal#7131]
- +- *(2) BroadcastHashJoin [cast(EventNumber#2340 as int)], [EventNumber#2355], Inner, BuildRight, false
- :- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, substring_index(EventNumber#2328, =, 1) AS EventNumber#2340, cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0)) AS EventDecimal#2341]
- : +- *(2) Filter (NOT (cast(coalesce(cast(CASE WHEN isnotnull(cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0))) THEN cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0)) ELSE 0 END as int), 0) as string) = 0) && isnotnull(substring_index(EventNumber#2328, =, 1)))
- : +- *(2) Generate explode(split(post_event_list#1298, ,)), [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182], false, [EventNumber#2328]
- : +- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, post_event_list#1298]
- : +- *(2) FileScan csv [date_time#1182,hitid_high#1198,hitid_low#1199,post_event_list#1298,post_visid_high#1403,post_visid_low#1404,visit_num#1438,visit_page_num#1439,visit_start_time_gmt#1444] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[dbfs:/mnt/dev-raw/sitecat/hit_data/2019/01/21], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<date_time:string,hitid_high:string,hitid_low:string,post_event_list:string,post_visid_high...
- +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[5, int, false] as bigint)))
- +- *(1) HashAggregate(keys=[1 AS ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, EventEvarPropName#2356 AS columnName#7128, null AS columnValue#8155, 1 AS ExistOnHit#8156, EventNumber#2355], functions=[], output=[ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, columnName#7128, columnValue#8155, ExistOnHit#8156, EventNumber#2355])
- +- *(1) Project [BizEvarPropEventNumberKey#2354 AS BizEvarPropEventNumberKey#2354, EventNumber#2355 AS EventNumber#2355, EventEvarPropName#2356 AS EventEvarPropName#2356]
- +- *(1) Scan SqlDWRelation((SELECT * FROM (SELECT [...] pEventNumberKey" > 1))) q} ) [BizEvarPropEventNumberKey#2354,EventNumber#2355,EventEvarPropName#2356] PushedFilters: [], ReadSchema: struct<BizEvarPropEventNumberKey:int,EventNumber:int,EventEvarPropName:string>
- at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
- at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:119)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:143)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
- at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:131)
- at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:389)
- at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:198)
- at org.apache.spark.sql.execution.ProjectExec.inputRDDs(basicPhysicalOperators.scala:45)
- at org.apache.spark.sql.execution.SortExec.inputRDDs(SortExec.scala:121)
- at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:625)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:143)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
- at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:131)
- at org.apache.spark.sql.execution.aggregate.SortAggregateExec$$anonfun$doExecute$1.apply(SortAggregateExec.scala:95)
- at org.apache.spark.sql.execution.aggregate.SortAggregateExec$$anonfun$doExecute$1.apply(SortAggregateExec.scala:93)
- at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
- ... 61 more
- Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: Binding attribute, tree: ExistOnEventList#7132
- at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
- at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:79)
- at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:78)
- at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:277)
- at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:277)
- at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:77)
- at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:276)
- at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:266)
- at org.apache.spark.sql.catalyst.expressions.BindReferences$.bindReference(BoundAttribute.scala:78)
- at org.apache.spark.sql.execution.aggregate.HashAggregateExec$$anonfun$55.apply(HashAggregateExec.scala:1031)
- at org.apache.spark.sql.execution.aggregate.HashAggregateExec$$anonfun$55.apply(HashAggregateExec.scala:1031)
- at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
- at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
- at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
- at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
- at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
- at scala.collection.AbstractTraversable.map(Traversable.scala:104)
- at org.apache.spark.sql.execution.aggregate.HashAggregateExec.doConsumeWithKeys(HashAggregateExec.scala:1031)
- at org.apache.spark.sql.execution.aggregate.HashAggregateExec.doConsume(HashAggregateExec.scala:224)
- at org.apache.spark.sql.execution.CodegenSupport$class.constructDoConsumeFunction(WholeStageCodegenExec.scala:214)
- at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:185)
- at org.apache.spark.sql.execution.ProjectExec.consume(basicPhysicalOperators.scala:39)
- at org.apache.spark.sql.execution.ProjectExec.doConsume(basicPhysicalOperators.scala:75)
- at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:187)
- at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.consume(BroadcastHashJoinExec.scala:45)
- at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.codegenInner(BroadcastHashJoinExec.scala:262)
- at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.doConsume(BroadcastHashJoinExec.scala:132)
- at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:187)
- at org.apache.spark.sql.execution.ProjectExec.consume(basicPhysicalOperators.scala:39)
- at org.apache.spark.sql.execution.ProjectExec.doConsume(basicPhysicalOperators.scala:75)
- at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:187)
- at org.apache.spark.sql.execution.FilterExec.consume(basicPhysicalOperators.scala:107)
- at org.apache.spark.sql.execution.FilterExec.doConsume(basicPhysicalOperators.scala:246)
- at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:187)
- at org.apache.spark.sql.execution.GenerateExec.consume(GenerateExec.scala:58)
- at org.apache.spark.sql.execution.GenerateExec.codeGenCollection(GenerateExec.scala:242)
- at org.apache.spark.sql.execution.GenerateExec.doConsume(GenerateExec.scala:154)
- at org.apache.spark.sql.execution.CodegenSupport$class.constructDoConsumeFunction(WholeStageCodegenExec.scala:214)
- at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:185)
- at org.apache.spark.sql.execution.ProjectExec.consume(basicPhysicalOperators.scala:39)
- at org.apache.spark.sql.execution.ProjectExec.doConsume(basicPhysicalOperators.scala:75)
- at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:187)
- at org.apache.spark.sql.execution.FileSourceScanExec.consume(DataSourceScanExec.scala:174)
- at org.apache.spark.sql.execution.FileSourceScanExec.doProduce(DataSourceScanExec.scala:816)
- at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:90)
- at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
- at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:85)
- at org.apache.spark.sql.execution.FileSourceScanExec.produce(DataSourceScanExec.scala:174)
- at org.apache.spark.sql.execution.ProjectExec.doProduce(basicPhysicalOperators.scala:49)
- at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:90)
- at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
- at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:85)
- at org.apache.spark.sql.execution.ProjectExec.produce(basicPhysicalOperators.scala:39)
- at org.apache.spark.sql.execution.GenerateExec.doProduce(GenerateExec.scala:141)
- at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:90)
- at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
- at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:85)
- at org.apache.spark.sql.execution.GenerateExec.produce(GenerateExec.scala:58)
- at org.apache.spark.sql.execution.FilterExec.doProduce(basicPhysicalOperators.scala:147)
- at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:90)
- at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
- at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:85)
- at org.apache.spark.sql.execution.FilterExec.produce(basicPhysicalOperators.scala:107)
- at org.apache.spark.sql.execution.ProjectExec.doProduce(basicPhysicalOperators.scala:49)
- at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:90)
- at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
- at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:85)
- at org.apache.spark.sql.execution.ProjectExec.produce(basicPhysicalOperators.scala:39)
- at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.doProduce(BroadcastHashJoinExec.scala:127)
- at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:90)
- at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
- at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:85)
- at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.produce(BroadcastHashJoinExec.scala:45)
- at org.apache.spark.sql.execution.ProjectExec.doProduce(basicPhysicalOperators.scala:49)
- at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:90)
- at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
- at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:85)
- at org.apache.spark.sql.execution.ProjectExec.produce(basicPhysicalOperators.scala:39)
- at org.apache.spark.sql.execution.aggregate.HashAggregateExec.doProduceWithKeys(HashAggregateExec.scala:919)
- at org.apache.spark.sql.execution.aggregate.HashAggregateExec.doProduce(HashAggregateExec.scala:216)
- at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:90)
- at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
- at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:85)
- at org.apache.spark.sql.execution.aggregate.HashAggregateExec.produce(HashAggregateExec.scala:43)
- at org.apache.spark.sql.execution.WholeStageCodegenExec.doCodeGen(WholeStageCodegenExec.scala:542)
- at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:596)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:143)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
- at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:131)
- at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.prepareShuffleDependency(ShuffleExchangeExec.scala:92)
- at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$$anonfun$doExecute$1.apply(ShuffleExchangeExec.scala:128)
- at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$$anonfun$doExecute$1.apply(ShuffleExchangeExec.scala:119)
- at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
- ... 82 more
- Caused by: java.lang.RuntimeException: Couldn't find ExistOnEventList#7132 in [post_visid_high#1403,post_visid_low#1404,hitid_high#1198,hitid_low#1199,visit_num#1438,visit_page_num#1439,visit_start_time_gmt#1444,date_time#1182,EventNumber#2340,EventDecimal#2341,ExistOnEventList#8154,BizEvarPropEventNumberKey#2354,columnName#7128,columnValue#8155,ExistOnHit#8156,EventNumber#2355,EventDecimal#7131]
- at scala.sys.package$.error(package.scala:27)
- at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1$$anonfun$applyOrElse$1.apply(BoundAttribute.scala:85)
- at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1$$anonfun$applyOrElse$1.apply(BoundAttribute.scala:79)
- at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement