Advertisement
Guest User

Untitled

a guest
May 20th, 2019
169
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 46.63 KB | None | 0 0
  1. ---------------------------------------------------------------------------
  2. Py4JJavaError Traceback (most recent call last)
  3. <command-2672791667097299> in <module>()
  4. ----> 1 dfLoadHitDataPostEvar.where(~F.col('EventDecimal').like('0')).head(10)
  5.  
  6. /databricks/spark/python/pyspark/sql/dataframe.py in head(self, n)
  7. 1275 rs = self.head(1)
  8. 1276 return rs[0] if rs else None
  9. -> 1277 return self.take(n)
  10. 1278
  11. 1279 @ignore_unicode_prefix
  12.  
  13. /databricks/spark/python/pyspark/sql/dataframe.py in take(self, num)
  14. 587 [Row(age=2, name=u'Alice'), Row(age=5, name=u'Bob')]
  15. 588 """
  16. --> 589 return self.limit(num).collect()
  17. 590
  18. 591 @since(1.3)
  19.  
  20. /databricks/spark/python/pyspark/sql/dataframe.py in collect(self)
  21. 546 # Default path used in OSS Spark / for non-DF-ACL clusters:
  22. 547 with SCCallSiteSync(self._sc) as css:
  23. --> 548 sock_info = self._jdf.collectToPython()
  24. 549 return list(_load_from_socket(sock_info, BatchedSerializer(PickleSerializer())))
  25. 550
  26.  
  27. /databricks/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args)
  28. 1255 answer = self.gateway_client.send_command(command)
  29. 1256 return_value = get_return_value(
  30. -> 1257 answer, self.gateway_client, self.target_id, self.name)
  31. 1258
  32. 1259 for temp_arg in temp_args:
  33.  
  34. /databricks/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
  35. 61 def deco(*a, **kw):
  36. 62 try:
  37. ---> 63 return f(*a, **kw)
  38. 64 except py4j.protocol.Py4JJavaError as e:
  39. 65 s = e.java_exception.toString()
  40.  
  41. /databricks/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py in get_return_value(answer, gateway_client, target_id, name)
  42. 326 raise Py4JJavaError(
  43. 327 "An error occurred while calling {0}{1}{2}.\n".
  44. --> 328 format(target_id, ".", name), value)
  45. 329 else:
  46. 330 raise Py4JError(
  47.  
  48. Py4JJavaError: An error occurred while calling o1189.collectToPython.
  49. : org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
  50. SortAggregate(key=[post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230], functions=[finalmerge_max(merge max#7297) AS max(columnValue#7212)#7262, finalmerge_max(merge max#7299) AS max(ExistOnEventList#7231)#7264], output=[post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, columnValue#7263, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230, ExistOnEventList#7265])
  51. +- *(4) Sort [post_visid_high#1403 ASC NULLS FIRST, post_visid_low#1404 ASC NULLS FIRST, hitid_high#1198 ASC NULLS FIRST, hitid_low#1199 ASC NULLS FIRST, visit_num#1438 ASC NULLS FIRST, visit_page_num#1439 ASC NULLS FIRST, visit_start_time_gmt#1444 ASC NULLS FIRST, date_time#1182 ASC NULLS FIRST, EventNumber#2355 ASC NULLS FIRST, columnName#7128 ASC NULLS FIRST, ExistOnHit#7130 ASC NULLS FIRST, BizEvarPropEventNumberKey#2354 ASC NULLS FIRST, EventDecimal#7230 ASC NULLS FIRST], false, 0
  52. +- Exchange hashpartitioning(post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230, 200)
  53. +- SortAggregate(key=[post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230], functions=[partial_max(columnValue#7212) AS max#7297, partial_max(ExistOnEventList#7231) AS max#7299], output=[post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230, max#7297, max#7299])
  54. +- *(3) Sort [post_visid_high#1403 ASC NULLS FIRST, post_visid_low#1404 ASC NULLS FIRST, hitid_high#1198 ASC NULLS FIRST, hitid_low#1199 ASC NULLS FIRST, visit_num#1438 ASC NULLS FIRST, visit_page_num#1439 ASC NULLS FIRST, visit_start_time_gmt#1444 ASC NULLS FIRST, date_time#1182 ASC NULLS FIRST, EventNumber#2355 ASC NULLS FIRST, columnName#7128 ASC NULLS FIRST, ExistOnHit#7130 ASC NULLS FIRST, BizEvarPropEventNumberKey#2354 ASC NULLS FIRST, EventDecimal#7230 ASC NULLS FIRST], false, 0
  55. +- *(3) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, columnValue#7212, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, coalesce(cast(EventDecimal#7131 as int), 0) AS EventDecimal#7230, coalesce(ExistOnEventList#7132, 0) AS ExistOnEventList#7231]
  56. +- *(3) HashAggregate(keys=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404], functions=[], output=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404])
  57. +- Exchange hashpartitioning(ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404, 200)
  58. +- *(2) HashAggregate(keys=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404], functions=[], output=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404])
  59. +- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2340, EventDecimal#2341, ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, columnName#7128, columnValue#8155, ExistOnHit#8156, EventNumber#2355, CASE WHEN isnotnull(EventDecimal#2341) THEN EventDecimal#2341 ELSE 0 END AS EventDecimal#7131]
  60. +- *(2) BroadcastHashJoin [cast(EventNumber#2340 as int)], [EventNumber#2355], Inner, BuildRight, false
  61. :- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, substring_index(EventNumber#2328, =, 1) AS EventNumber#2340, cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0)) AS EventDecimal#2341]
  62. : +- *(2) Filter (NOT (cast(coalesce(cast(CASE WHEN isnotnull(cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0))) THEN cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0)) ELSE 0 END as int), 0) as string) = 0) && isnotnull(substring_index(EventNumber#2328, =, 1)))
  63. : +- *(2) Generate explode(split(post_event_list#1298, ,)), [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182], false, [EventNumber#2328]
  64. : +- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, post_event_list#1298]
  65. : +- *(2) FileScan csv [date_time#1182,hitid_high#1198,hitid_low#1199,post_event_list#1298,post_visid_high#1403,post_visid_low#1404,visit_num#1438,visit_page_num#1439,visit_start_time_gmt#1444] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[dbfs:/mnt/dev-raw/sitecat/hit_data/2019/01/21], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<date_time:string,hitid_high:string,hitid_low:string,post_event_list:string,post_visid_high...
  66. +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[5, int, false] as bigint)))
  67. +- *(1) HashAggregate(keys=[1 AS ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, EventEvarPropName#2356 AS columnName#7128, null AS columnValue#8155, 1 AS ExistOnHit#8156, EventNumber#2355], functions=[], output=[ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, columnName#7128, columnValue#8155, ExistOnHit#8156, EventNumber#2355])
  68. +- *(1) Project [BizEvarPropEventNumberKey#2354 AS BizEvarPropEventNumberKey#2354, EventNumber#2355 AS EventNumber#2355, EventEvarPropName#2356 AS EventEvarPropName#2356]
  69. +- *(1) Scan SqlDWRelation((SELECT * FROM (SELECT [...] pEventNumberKey" > 1))) q} ) [BizEvarPropEventNumberKey#2354,EventNumber#2355,EventEvarPropName#2356] PushedFilters: [], ReadSchema: struct<BizEvarPropEventNumberKey:int,EventNumber:int,EventEvarPropName:string>
  70.  
  71. at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
  72. at org.apache.spark.sql.execution.aggregate.SortAggregateExec.doExecute(SortAggregateExec.scala:93)
  73. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:143)
  74. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
  75. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
  76. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  77. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
  78. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:131)
  79. at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:66)
  80. at org.apache.spark.sql.execution.collect.Collector$.collect(Collector.scala:75)
  81. at org.apache.spark.sql.execution.ResultCacheManager.getOrComputeResult(ResultCacheManager.scala:497)
  82. at org.apache.spark.sql.execution.CollectLimitExec.executeCollectResult(limit.scala:48)
  83. at org.apache.spark.sql.Dataset$$anonfun$51.apply(Dataset.scala:3289)
  84. at org.apache.spark.sql.Dataset$$anonfun$51.apply(Dataset.scala:3288)
  85. at org.apache.spark.sql.Dataset$$anonfun$55.apply(Dataset.scala:3423)
  86. at org.apache.spark.sql.execution.SQLExecution$$anonfun$withCustomExecutionEnv$1.apply(SQLExecution.scala:99)
  87. at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:228)
  88. at org.apache.spark.sql.execution.SQLExecution$.withCustomExecutionEnv(SQLExecution.scala:85)
  89. at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:158)
  90. at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withAction(Dataset.scala:3422)
  91. at org.apache.spark.sql.Dataset.collectToPython(Dataset.scala:3288)
  92. at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
  93. at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
  94. at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  95. at java.lang.reflect.Method.invoke(Method.java:498)
  96. at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
  97. at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:380)
  98. at py4j.Gateway.invoke(Gateway.java:295)
  99. at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
  100. at py4j.commands.CallCommand.execute(CallCommand.java:79)
  101. at py4j.GatewayConnection.run(GatewayConnection.java:251)
  102. at java.lang.Thread.run(Thread.java:748)
  103. Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
  104. Exchange hashpartitioning(post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230, 200)
  105. +- SortAggregate(key=[post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230], functions=[partial_max(columnValue#7212) AS max#7297, partial_max(ExistOnEventList#7231) AS max#7299], output=[post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230, max#7297, max#7299])
  106. +- *(3) Sort [post_visid_high#1403 ASC NULLS FIRST, post_visid_low#1404 ASC NULLS FIRST, hitid_high#1198 ASC NULLS FIRST, hitid_low#1199 ASC NULLS FIRST, visit_num#1438 ASC NULLS FIRST, visit_page_num#1439 ASC NULLS FIRST, visit_start_time_gmt#1444 ASC NULLS FIRST, date_time#1182 ASC NULLS FIRST, EventNumber#2355 ASC NULLS FIRST, columnName#7128 ASC NULLS FIRST, ExistOnHit#7130 ASC NULLS FIRST, BizEvarPropEventNumberKey#2354 ASC NULLS FIRST, EventDecimal#7230 ASC NULLS FIRST], false, 0
  107. +- *(3) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, columnValue#7212, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, coalesce(cast(EventDecimal#7131 as int), 0) AS EventDecimal#7230, coalesce(ExistOnEventList#7132, 0) AS ExistOnEventList#7231]
  108. +- *(3) HashAggregate(keys=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404], functions=[], output=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404])
  109. +- Exchange hashpartitioning(ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404, 200)
  110. +- *(2) HashAggregate(keys=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404], functions=[], output=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404])
  111. +- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2340, EventDecimal#2341, ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, columnName#7128, columnValue#8155, ExistOnHit#8156, EventNumber#2355, CASE WHEN isnotnull(EventDecimal#2341) THEN EventDecimal#2341 ELSE 0 END AS EventDecimal#7131]
  112. +- *(2) BroadcastHashJoin [cast(EventNumber#2340 as int)], [EventNumber#2355], Inner, BuildRight, false
  113. :- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, substring_index(EventNumber#2328, =, 1) AS EventNumber#2340, cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0)) AS EventDecimal#2341]
  114. : +- *(2) Filter (NOT (cast(coalesce(cast(CASE WHEN isnotnull(cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0))) THEN cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0)) ELSE 0 END as int), 0) as string) = 0) && isnotnull(substring_index(EventNumber#2328, =, 1)))
  115. : +- *(2) Generate explode(split(post_event_list#1298, ,)), [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182], false, [EventNumber#2328]
  116. : +- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, post_event_list#1298]
  117. : +- *(2) FileScan csv [date_time#1182,hitid_high#1198,hitid_low#1199,post_event_list#1298,post_visid_high#1403,post_visid_low#1404,visit_num#1438,visit_page_num#1439,visit_start_time_gmt#1444] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[dbfs:/mnt/dev-raw/sitecat/hit_data/2019/01/21], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<date_time:string,hitid_high:string,hitid_low:string,post_event_list:string,post_visid_high...
  118. +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[5, int, false] as bigint)))
  119. +- *(1) HashAggregate(keys=[1 AS ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, EventEvarPropName#2356 AS columnName#7128, null AS columnValue#8155, 1 AS ExistOnHit#8156, EventNumber#2355], functions=[], output=[ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, columnName#7128, columnValue#8155, ExistOnHit#8156, EventNumber#2355])
  120. +- *(1) Project [BizEvarPropEventNumberKey#2354 AS BizEvarPropEventNumberKey#2354, EventNumber#2355 AS EventNumber#2355, EventEvarPropName#2356 AS EventEvarPropName#2356]
  121. +- *(1) Scan SqlDWRelation((SELECT * FROM (SELECT [...] pEventNumberKey" > 1))) q} ) [BizEvarPropEventNumberKey#2354,EventNumber#2355,EventEvarPropName#2356] PushedFilters: [], ReadSchema: struct<BizEvarPropEventNumberKey:int,EventNumber:int,EventEvarPropName:string>
  122.  
  123. at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
  124. at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:119)
  125. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:143)
  126. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
  127. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
  128. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  129. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
  130. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:131)
  131. at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:389)
  132. at org.apache.spark.sql.execution.SortExec.inputRDDs(SortExec.scala:121)
  133. at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:625)
  134. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:143)
  135. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
  136. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
  137. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  138. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
  139. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:131)
  140. at org.apache.spark.sql.execution.aggregate.SortAggregateExec$$anonfun$doExecute$1.apply(SortAggregateExec.scala:95)
  141. at org.apache.spark.sql.execution.aggregate.SortAggregateExec$$anonfun$doExecute$1.apply(SortAggregateExec.scala:93)
  142. at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
  143. ... 31 more
  144. Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
  145. SortAggregate(key=[post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230], functions=[partial_max(columnValue#7212) AS max#7297, partial_max(ExistOnEventList#7231) AS max#7299], output=[post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2355, columnName#7128, ExistOnHit#7130, BizEvarPropEventNumberKey#2354, EventDecimal#7230, max#7297, max#7299])
  146.  
  147. *** WARNING: skipped 896 bytes of output ***
  148.  
  149. +- *(3) HashAggregate(keys=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404], functions=[], output=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404])
  150. +- Exchange hashpartitioning(ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404, 200)
  151. +- *(2) HashAggregate(keys=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404], functions=[], output=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404])
  152. +- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2340, EventDecimal#2341, ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, columnName#7128, columnValue#8155, ExistOnHit#8156, EventNumber#2355, CASE WHEN isnotnull(EventDecimal#2341) THEN EventDecimal#2341 ELSE 0 END AS EventDecimal#7131]
  153. +- *(2) BroadcastHashJoin [cast(EventNumber#2340 as int)], [EventNumber#2355], Inner, BuildRight, false
  154. :- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, substring_index(EventNumber#2328, =, 1) AS EventNumber#2340, cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0)) AS EventDecimal#2341]
  155. : +- *(2) Filter (NOT (cast(coalesce(cast(CASE WHEN isnotnull(cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0))) THEN cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0)) ELSE 0 END as int), 0) as string) = 0) && isnotnull(substring_index(EventNumber#2328, =, 1)))
  156. : +- *(2) Generate explode(split(post_event_list#1298, ,)), [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182], false, [EventNumber#2328]
  157. : +- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, post_event_list#1298]
  158. : +- *(2) FileScan csv [date_time#1182,hitid_high#1198,hitid_low#1199,post_event_list#1298,post_visid_high#1403,post_visid_low#1404,visit_num#1438,visit_page_num#1439,visit_start_time_gmt#1444] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[dbfs:/mnt/dev-raw/sitecat/hit_data/2019/01/21], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<date_time:string,hitid_high:string,hitid_low:string,post_event_list:string,post_visid_high...
  159. +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[5, int, false] as bigint)))
  160. +- *(1) HashAggregate(keys=[1 AS ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, EventEvarPropName#2356 AS columnName#7128, null AS columnValue#8155, 1 AS ExistOnHit#8156, EventNumber#2355], functions=[], output=[ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, columnName#7128, columnValue#8155, ExistOnHit#8156, EventNumber#2355])
  161. +- *(1) Project [BizEvarPropEventNumberKey#2354 AS BizEvarPropEventNumberKey#2354, EventNumber#2355 AS EventNumber#2355, EventEvarPropName#2356 AS EventEvarPropName#2356]
  162. +- *(1) Scan SqlDWRelation((SELECT * FROM (SELECT [...] pEventNumberKey" > 1))) q} ) [BizEvarPropEventNumberKey#2354,EventNumber#2355,EventEvarPropName#2356] PushedFilters: [], ReadSchema: struct<BizEvarPropEventNumberKey:int,EventNumber:int,EventEvarPropName:string>
  163.  
  164. at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
  165. at org.apache.spark.sql.execution.aggregate.SortAggregateExec.doExecute(SortAggregateExec.scala:93)
  166. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:143)
  167. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
  168. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
  169. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  170. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
  171. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:131)
  172. at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.prepareShuffleDependency(ShuffleExchangeExec.scala:92)
  173. at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$$anonfun$doExecute$1.apply(ShuffleExchangeExec.scala:128)
  174. at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$$anonfun$doExecute$1.apply(ShuffleExchangeExec.scala:119)
  175. at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
  176. ... 50 more
  177. Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
  178. Exchange hashpartitioning(ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404, 200)
  179. +- *(2) HashAggregate(keys=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404], functions=[], output=[ExistOnEventList#7132, BizEvarPropEventNumberKey#2354, EventDecimal#7131, columnName#7128, hitid_low#1199, visit_num#1438, visit_start_time_gmt#1444, visit_page_num#1439, post_visid_high#1403, columnValue#7212, date_time#1182, ExistOnHit#7130, EventNumber#2355, hitid_high#1198, post_visid_low#1404])
  180. +- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, EventNumber#2340, EventDecimal#2341, ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, columnName#7128, columnValue#8155, ExistOnHit#8156, EventNumber#2355, CASE WHEN isnotnull(EventDecimal#2341) THEN EventDecimal#2341 ELSE 0 END AS EventDecimal#7131]
  181. +- *(2) BroadcastHashJoin [cast(EventNumber#2340 as int)], [EventNumber#2355], Inner, BuildRight, false
  182. :- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, substring_index(EventNumber#2328, =, 1) AS EventNumber#2340, cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0)) AS EventDecimal#2341]
  183. : +- *(2) Filter (NOT (cast(coalesce(cast(CASE WHEN isnotnull(cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0))) THEN cast(ephemeralsubstring(EventNumber#2328, (length(substring_index(EventNumber#2328, =, -1)) + 1), 50) as decimal(10,0)) ELSE 0 END as int), 0) as string) = 0) && isnotnull(substring_index(EventNumber#2328, =, 1)))
  184. : +- *(2) Generate explode(split(post_event_list#1298, ,)), [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182], false, [EventNumber#2328]
  185. : +- *(2) Project [post_visid_high#1403, post_visid_low#1404, hitid_high#1198, hitid_low#1199, visit_num#1438, visit_page_num#1439, visit_start_time_gmt#1444, date_time#1182, post_event_list#1298]
  186. : +- *(2) FileScan csv [date_time#1182,hitid_high#1198,hitid_low#1199,post_event_list#1298,post_visid_high#1403,post_visid_low#1404,visit_num#1438,visit_page_num#1439,visit_start_time_gmt#1444] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[dbfs:/mnt/dev-raw/sitecat/hit_data/2019/01/21], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<date_time:string,hitid_high:string,hitid_low:string,post_event_list:string,post_visid_high...
  187. +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[5, int, false] as bigint)))
  188. +- *(1) HashAggregate(keys=[1 AS ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, EventEvarPropName#2356 AS columnName#7128, null AS columnValue#8155, 1 AS ExistOnHit#8156, EventNumber#2355], functions=[], output=[ExistOnEventList#8154, BizEvarPropEventNumberKey#2354, columnName#7128, columnValue#8155, ExistOnHit#8156, EventNumber#2355])
  189. +- *(1) Project [BizEvarPropEventNumberKey#2354 AS BizEvarPropEventNumberKey#2354, EventNumber#2355 AS EventNumber#2355, EventEvarPropName#2356 AS EventEvarPropName#2356]
  190. +- *(1) Scan SqlDWRelation((SELECT * FROM (SELECT [...] pEventNumberKey" > 1))) q} ) [BizEvarPropEventNumberKey#2354,EventNumber#2355,EventEvarPropName#2356] PushedFilters: [], ReadSchema: struct<BizEvarPropEventNumberKey:int,EventNumber:int,EventEvarPropName:string>
  191.  
  192. at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
  193. at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:119)
  194. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:143)
  195. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
  196. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
  197. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  198. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
  199. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:131)
  200. at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:389)
  201. at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:198)
  202. at org.apache.spark.sql.execution.ProjectExec.inputRDDs(basicPhysicalOperators.scala:45)
  203. at org.apache.spark.sql.execution.SortExec.inputRDDs(SortExec.scala:121)
  204. at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:625)
  205. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:143)
  206. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
  207. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
  208. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  209. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
  210. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:131)
  211. at org.apache.spark.sql.execution.aggregate.SortAggregateExec$$anonfun$doExecute$1.apply(SortAggregateExec.scala:95)
  212. at org.apache.spark.sql.execution.aggregate.SortAggregateExec$$anonfun$doExecute$1.apply(SortAggregateExec.scala:93)
  213. at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
  214. ... 61 more
  215. Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: Binding attribute, tree: ExistOnEventList#7132
  216. at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
  217. at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:79)
  218. at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1.applyOrElse(BoundAttribute.scala:78)
  219. at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:277)
  220. at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$2.apply(TreeNode.scala:277)
  221. at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:77)
  222. at org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:276)
  223. at org.apache.spark.sql.catalyst.trees.TreeNode.transform(TreeNode.scala:266)
  224. at org.apache.spark.sql.catalyst.expressions.BindReferences$.bindReference(BoundAttribute.scala:78)
  225. at org.apache.spark.sql.execution.aggregate.HashAggregateExec$$anonfun$55.apply(HashAggregateExec.scala:1031)
  226. at org.apache.spark.sql.execution.aggregate.HashAggregateExec$$anonfun$55.apply(HashAggregateExec.scala:1031)
  227. at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  228. at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  229. at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
  230. at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
  231. at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
  232. at scala.collection.AbstractTraversable.map(Traversable.scala:104)
  233. at org.apache.spark.sql.execution.aggregate.HashAggregateExec.doConsumeWithKeys(HashAggregateExec.scala:1031)
  234. at org.apache.spark.sql.execution.aggregate.HashAggregateExec.doConsume(HashAggregateExec.scala:224)
  235. at org.apache.spark.sql.execution.CodegenSupport$class.constructDoConsumeFunction(WholeStageCodegenExec.scala:214)
  236. at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:185)
  237. at org.apache.spark.sql.execution.ProjectExec.consume(basicPhysicalOperators.scala:39)
  238. at org.apache.spark.sql.execution.ProjectExec.doConsume(basicPhysicalOperators.scala:75)
  239. at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:187)
  240. at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.consume(BroadcastHashJoinExec.scala:45)
  241. at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.codegenInner(BroadcastHashJoinExec.scala:262)
  242. at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.doConsume(BroadcastHashJoinExec.scala:132)
  243. at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:187)
  244. at org.apache.spark.sql.execution.ProjectExec.consume(basicPhysicalOperators.scala:39)
  245. at org.apache.spark.sql.execution.ProjectExec.doConsume(basicPhysicalOperators.scala:75)
  246. at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:187)
  247. at org.apache.spark.sql.execution.FilterExec.consume(basicPhysicalOperators.scala:107)
  248. at org.apache.spark.sql.execution.FilterExec.doConsume(basicPhysicalOperators.scala:246)
  249. at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:187)
  250. at org.apache.spark.sql.execution.GenerateExec.consume(GenerateExec.scala:58)
  251. at org.apache.spark.sql.execution.GenerateExec.codeGenCollection(GenerateExec.scala:242)
  252. at org.apache.spark.sql.execution.GenerateExec.doConsume(GenerateExec.scala:154)
  253. at org.apache.spark.sql.execution.CodegenSupport$class.constructDoConsumeFunction(WholeStageCodegenExec.scala:214)
  254. at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:185)
  255. at org.apache.spark.sql.execution.ProjectExec.consume(basicPhysicalOperators.scala:39)
  256. at org.apache.spark.sql.execution.ProjectExec.doConsume(basicPhysicalOperators.scala:75)
  257. at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:187)
  258. at org.apache.spark.sql.execution.FileSourceScanExec.consume(DataSourceScanExec.scala:174)
  259. at org.apache.spark.sql.execution.FileSourceScanExec.doProduce(DataSourceScanExec.scala:816)
  260. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:90)
  261. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
  262. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
  263. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  264. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
  265. at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:85)
  266. at org.apache.spark.sql.execution.FileSourceScanExec.produce(DataSourceScanExec.scala:174)
  267. at org.apache.spark.sql.execution.ProjectExec.doProduce(basicPhysicalOperators.scala:49)
  268. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:90)
  269. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
  270. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
  271. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  272. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
  273. at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:85)
  274. at org.apache.spark.sql.execution.ProjectExec.produce(basicPhysicalOperators.scala:39)
  275. at org.apache.spark.sql.execution.GenerateExec.doProduce(GenerateExec.scala:141)
  276. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:90)
  277. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
  278. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
  279. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  280. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
  281. at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:85)
  282. at org.apache.spark.sql.execution.GenerateExec.produce(GenerateExec.scala:58)
  283. at org.apache.spark.sql.execution.FilterExec.doProduce(basicPhysicalOperators.scala:147)
  284. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:90)
  285. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
  286. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
  287. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  288. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
  289. at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:85)
  290. at org.apache.spark.sql.execution.FilterExec.produce(basicPhysicalOperators.scala:107)
  291. at org.apache.spark.sql.execution.ProjectExec.doProduce(basicPhysicalOperators.scala:49)
  292. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:90)
  293. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
  294. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
  295. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  296. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
  297. at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:85)
  298. at org.apache.spark.sql.execution.ProjectExec.produce(basicPhysicalOperators.scala:39)
  299. at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.doProduce(BroadcastHashJoinExec.scala:127)
  300. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:90)
  301. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
  302. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
  303. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  304. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
  305. at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:85)
  306. at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.produce(BroadcastHashJoinExec.scala:45)
  307. at org.apache.spark.sql.execution.ProjectExec.doProduce(basicPhysicalOperators.scala:49)
  308. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:90)
  309. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
  310. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
  311. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  312. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
  313. at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:85)
  314. at org.apache.spark.sql.execution.ProjectExec.produce(basicPhysicalOperators.scala:39)
  315. at org.apache.spark.sql.execution.aggregate.HashAggregateExec.doProduceWithKeys(HashAggregateExec.scala:919)
  316. at org.apache.spark.sql.execution.aggregate.HashAggregateExec.doProduce(HashAggregateExec.scala:216)
  317. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:90)
  318. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
  319. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
  320. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  321. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
  322. at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:85)
  323. at org.apache.spark.sql.execution.aggregate.HashAggregateExec.produce(HashAggregateExec.scala:43)
  324. at org.apache.spark.sql.execution.WholeStageCodegenExec.doCodeGen(WholeStageCodegenExec.scala:542)
  325. at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:596)
  326. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:143)
  327. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:131)
  328. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$5.apply(SparkPlan.scala:183)
  329. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  330. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:180)
  331. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:131)
  332. at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.prepareShuffleDependency(ShuffleExchangeExec.scala:92)
  333. at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$$anonfun$doExecute$1.apply(ShuffleExchangeExec.scala:128)
  334. at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$$anonfun$doExecute$1.apply(ShuffleExchangeExec.scala:119)
  335. at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
  336. ... 82 more
  337. Caused by: java.lang.RuntimeException: Couldn't find ExistOnEventList#7132 in [post_visid_high#1403,post_visid_low#1404,hitid_high#1198,hitid_low#1199,visit_num#1438,visit_page_num#1439,visit_start_time_gmt#1444,date_time#1182,EventNumber#2340,EventDecimal#2341,ExistOnEventList#8154,BizEvarPropEventNumberKey#2354,columnName#7128,columnValue#8155,ExistOnHit#8156,EventNumber#2355,EventDecimal#7131]
  338. at scala.sys.package$.error(package.scala:27)
  339. at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1$$anonfun$applyOrElse$1.apply(BoundAttribute.scala:85)
  340. at org.apache.spark.sql.catalyst.expressions.BindReferences$$anonfun$bindReference$1$$anonfun$applyOrElse$1.apply(BoundAttribute.scala:79)
  341. at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement