Advertisement
Guest User

Untitled

a guest
Feb 5th, 2019
216
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 30.61 KB | None | 0 0
  1. File "script_2019-02-06-02-32-43.py", line 197, in <module>
  2. .parquet("s3://pinfare-glue/cleanedFlights")
  3. File "/mnt/yarn/usercache/root/appcache/application_1549418793443_0001/container_1549418793443_0001_01_000001/pyspark.zip/pyspark/sql/readwriter.py", line 691, in parquet
  4. File "/mnt/yarn/usercache/root/appcache/application_1549418793443_0001/container_1549418793443_0001_01_000001/py4j-0.10.4-src.zip/py4j/java_gateway.py", line 1133, in __call__
  5. File "/mnt/yarn/usercache/root/appcache/application_1549418793443_0001/container_1549418793443_0001_01_000001/pyspark.zip/pyspark/sql/utils.py", line 63, in deco
  6. File "/mnt/yarn/usercache/root/appcache/application_1549418793443_0001/container_1549418793443_0001_01_000001/py4j-0.10.4-src.zip/py4j/protocol.py", line 319, in get_return_value
  7. py4j.protocol.Py4JJavaError: An error occurred while calling o246.parquet.
  8. : org.apache.spark.SparkException: Job aborted.
  9. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:213)
  10. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:166)
  11. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:166)
  12. at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65)
  13. at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:166)
  14. at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:145)
  15. at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58)
  16. at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56)
  17. at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74)
  18. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
  19. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
  20. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
  21. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  22. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
  23. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
  24. at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92)
  25. at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92)
  26. at org.apache.spark.sql.execution.datasources.DataSource.writeInFileFormat(DataSource.scala:435)
  27. at org.apache.spark.sql.execution.datasources.DataSource.write(DataSource.scala:471)
  28. at org.apache.spark.sql.execution.datasources.SaveIntoDataSourceCommand.run(SaveIntoDataSourceCommand.scala:50)
  29. at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58)
  30. at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56)
  31. at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74)
  32. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
  33. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
  34. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
  35. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  36. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
  37. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
  38. at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92)
  39. at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92)
  40. at org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:609)
  41. at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:233)
  42. at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:217)
  43. at org.apache.spark.sql.DataFrameWriter.parquet(DataFrameWriter.scala:508)
  44. at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
  45. at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
  46. at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  47. at java.lang.reflect.Method.invoke(Method.java:498)
  48. at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
  49. at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
  50. at py4j.Gateway.invoke(Gateway.java:280)
  51. at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
  52. at py4j.commands.CallCommand.execute(CallCommand.java:79)
  53. at py4j.GatewayConnection.run(GatewayConnection.java:214)
  54. at java.lang.Thread.run(Thread.java:748)
  55. Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
  56. Exchange hashpartitioning(countryName#24, querydatetime#213, 200)
  57. +- *Project [master_key#588, master_querydatetime#589, Id#180, QueryTaskId#181, QueryOriginPlace#182, queryoutbounddate#334, queryinbounddate#375, QueryCabinClass#185, QueryCurrency#186, Agent#187, QuoteAgeInMinutes#188, Price#189, OutboundLegId#190, InboundLegId#191, outdeparture#416, outarrival#457, OutDuration#194, OutJourneyMode#195, OutStops#196, OutCarriers#197, OutOperatingCarriers#198, NumberOutStops#199, NumberOutCarriers#200, NumberOutOperatingCarriers#201, ... 33 more fields]
  58. +- BatchEvalPython [getInterval(cast(date_format(outdeparture#416, H, Some(Zulu)) as int), 0, 24, 4), getInterval(cast(date_format(indeparture#498, H, Some(Zulu)) as int), 0, 24, 4)], [master_key#588, master_querydatetime#589, Id#180, QueryTaskId#181, QueryOriginPlace#182, queryoutbounddate#334, queryinbounddate#375, QueryCabinClass#185, QueryCurrency#186, Agent#187, QuoteAgeInMinutes#188, Price#189, OutboundLegId#190, InboundLegId#191, outdeparture#416, outarrival#457, OutDuration#194, OutJourneyMode#195, OutStops#196, OutCarriers#197, OutOperatingCarriers#198, NumberOutStops#199, NumberOutCarriers#200, NumberOutOperatingCarriers#201, ... 21 more fields]
  59. +- *Sort [key#250 ASC NULLS FIRST, querydatetime#101 ASC NULLS FIRST], true, 0
  60. +- Exchange rangepartitioning(key#250 ASC NULLS FIRST, querydatetime#101 ASC NULLS FIRST, 200)
  61. +- *Project [key#250 AS master_key#588, querydatetime#101 AS master_querydatetime#589, Id#180, QueryTaskId#181, QueryOriginPlace#182, queryoutbounddate#334, queryinbounddate#375, QueryCabinClass#185, QueryCurrency#186, Agent#187, QuoteAgeInMinutes#188, Price#189, OutboundLegId#190, InboundLegId#191, outdeparture#416, outarrival#457, OutDuration#194, OutJourneyMode#195, OutStops#196, OutCarriers#197, OutOperatingCarriers#198, NumberOutStops#199, NumberOutCarriers#200, NumberOutOperatingCarriers#201, ... 19 more fields]
  62. +- *BroadcastHashJoin [key#250, querydatetime#101], [key#590, querydatetime#213], LeftOuter, BuildRight
  63. :- *Project [key#250, querydatetime#101]
  64. : +- BroadcastNestedLoopJoin BuildRight, Cross
  65. : :- Generate explode(pythonUDF0#1633), false, false, [querydatetime#101]
  66. : : +- BatchEvalPython [generate_date_series(start#94, stop#95)], [start#94, stop#95, pythonUDF0#1633]
  67. : : +- Scan ExistingRDD[start#94,stop#95]
  68. : +- BroadcastExchange IdentityBroadcastMode
  69. : +- *HashAggregate(keys=[key#250], functions=[], output=[key#250])
  70. : +- *HashAggregate(keys=[key#250], functions=[], output=[key#250])
  71. : +- *Sample 0.0, 0.001, false, 7736333241016522154
  72. : +- *GlobalLimit 5000000
  73. : +- Exchange SinglePartition
  74. : +- *LocalLimit 5000000
  75. : +- *Project [concat(outboundlegid#190, -, inboundlegid#191, -, agent#187) AS key#250]
  76. : +- *SortMergeJoin [querydestinationplace#212], [cast(airportId#38 as int)], Inner
  77. : :- *Sort [querydestinationplace#212 ASC NULLS FIRST], false, 0
  78. : : +- Exchange hashpartitioning(querydestinationplace#212, 200)
  79. : : +- *Project [Agent#187, OutboundLegId#190, InboundLegId#191, querydestinationplace#212]
  80. : : +- *SortMergeJoin [agent#187], [id#89], Inner
  81. : : :- *Sort [agent#187 ASC NULLS FIRST], false, 0
  82. : : : +- Exchange hashpartitioning(agent#187, 200)
  83. : : : +- *Project [Agent#187, OutboundLegId#190, InboundLegId#191, querydestinationplace#212]
  84. : : : +- *Filter (isnotnull(agent#187) && isnotnull(querydestinationplace#212))
  85. : : : +- Scan ExistingRDD[Id#180,QueryTaskId#181,QueryOriginPlace#182,QueryOutboundDate#183,QueryInboundDate#184,QueryCabinClass#185,QueryCurrency#186,Agent#187,QuoteAgeInMinutes#188,Price#189,OutboundLegId#190,InboundLegId#191,OutDeparture#192,OutArrival#193,OutDuration#194,OutJourneyMode#195,OutStops#196,OutCarriers#197,OutOperatingCarriers#198,NumberOutStops#199,NumberOutCarriers#200,NumberOutOperatingCarriers#201,InDeparture#202,InArrival#203,... 10 more fields]
  86. : : +- *Sort [id#89 ASC NULLS FIRST], false, 0
  87. : : +- Exchange hashpartitioning(id#89, 200)
  88. : : +- *Project [cast(id#67L as string) AS id#89]
  89. : : +- *Filter (isnotnull(id#67L) && isnotnull(cast(id#67L as string)))
  90. : : +- Scan ExistingRDD[id#67L,name#68,imageurl#69,status#70,optimisedformobile#71,type#72,bookingnumber#73,createdat#74,updatedat#75]
  91. : +- *Sort [cast(airportId#38 as int) ASC NULLS FIRST], false, 0
  92. : +- Exchange hashpartitioning(cast(airportId#38 as int), 200)
  93. : +- *Project [cast(airportId#18L as string) AS airportId#38]
  94. : +- *Filter (isnotnull(airportId#18L) && isnotnull(cast(airportId#18L as string)))
  95. : +- Scan ExistingRDD[airportId#18L,cityId#19L,countryId#20L,airportCode#21,airportName#22,cityName#23,countryName#24]
  96. +- BroadcastExchange HashedRelationBroadcastMode(List(input[34, string, true], input[33, date, true]))
  97. +- *Project [Id#180, QueryTaskId#181, QueryOriginPlace#182, cast(QueryOutboundDate#183 as date) AS queryoutbounddate#334, cast(QueryInboundDate#184 as date) AS queryinbounddate#375, QueryCabinClass#185, QueryCurrency#186, Agent#187, QuoteAgeInMinutes#188, Price#189, OutboundLegId#190, InboundLegId#191, cast(unix_timestamp(OutDeparture#192, yyyy-MM-dd'T'HH:mm:ss, Some(Zulu)) as timestamp) AS outdeparture#416, cast(unix_timestamp(OutArrival#193, yyyy-MM-dd'T'HH:mm:ss, Some(Zulu)) as timestamp) AS outarrival#457, OutDuration#194, OutJourneyMode#195, OutStops#196, OutCarriers#197, OutOperatingCarriers#198, NumberOutStops#199, NumberOutCarriers#200, NumberOutOperatingCarriers#201, cast(unix_timestamp(InDeparture#202, yyyy-MM-dd'T'HH:mm:ss, Some(Zulu)) as timestamp) AS indeparture#498, cast(unix_timestamp(InArrival#203, yyyy-MM-dd'T'HH:mm:ss, Some(Zulu)) as timestamp) AS inarrival#539, ... 15 more fields]
  98. +- *Sample 0.0, 0.001, false, 7736333241016522154
  99. +- *GlobalLimit 5000000
  100. +- Exchange SinglePartition
  101. +- *LocalLimit 5000000
  102. +- *Project [Id#180, QueryTaskId#181, QueryOriginPlace#182, QueryOutboundDate#183, QueryInboundDate#184, QueryCabinClass#185, QueryCurrency#186, Agent#187, QuoteAgeInMinutes#188, Price#189, OutboundLegId#190, InboundLegId#191, OutDeparture#192, OutArrival#193, OutDuration#194, OutJourneyMode#195, OutStops#196, OutCarriers#197, OutOperatingCarriers#198, NumberOutStops#199, NumberOutCarriers#200, NumberOutOperatingCarriers#201, InDeparture#202, InArrival#203, ... 15 more fields]
  103. +- *SortMergeJoin [querydestinationplace#212], [cast(airportId#38 as int)], Inner
  104. :- *Sort [querydestinationplace#212 ASC NULLS FIRST], false, 0
  105. : +- Exchange hashpartitioning(querydestinationplace#212, 200)
  106. : +- *Project [Id#180, QueryTaskId#181, QueryOriginPlace#182, QueryOutboundDate#183, QueryInboundDate#184, QueryCabinClass#185, QueryCurrency#186, Agent#187, QuoteAgeInMinutes#188, Price#189, OutboundLegId#190, InboundLegId#191, OutDeparture#192, OutArrival#193, OutDuration#194, OutJourneyMode#195, OutStops#196, OutCarriers#197, OutOperatingCarriers#198, NumberOutStops#199, NumberOutCarriers#200, NumberOutOperatingCarriers#201, InDeparture#202, InArrival#203, ... 11 more fields]
  107. : +- *SortMergeJoin [Agent#187], [id#89], Inner
  108. : :- *Sort [Agent#187 ASC NULLS FIRST], false, 0
  109. : : +- Exchange hashpartitioning(Agent#187, 200)
  110. : : +- *Filter (isnotnull(Agent#187) && isnotnull(querydestinationplace#212))
  111. : : +- Scan ExistingRDD[Id#180,QueryTaskId#181,QueryOriginPlace#182,QueryOutboundDate#183,QueryInboundDate#184,QueryCabinClass#185,QueryCurrency#186,Agent#187,QuoteAgeInMinutes#188,Price#189,OutboundLegId#190,InboundLegId#191,OutDeparture#192,OutArrival#193,OutDuration#194,OutJourneyMode#195,OutStops#196,OutCarriers#197,OutOperatingCarriers#198,NumberOutStops#199,NumberOutCarriers#200,NumberOutOperatingCarriers#201,InDeparture#202,InArrival#203,... 10 more fields]
  112. : +- *Sort [id#89 ASC NULLS FIRST], false, 0
  113. : +- Exchange hashpartitioning(id#89, 200)
  114. : +- *Project [cast(id#67L as string) AS id#89, name#68]
  115. : +- *Filter (isnotnull(id#67L) && isnotnull(cast(id#67L as string)))
  116. : +- Scan ExistingRDD[id#67L,name#68,imageurl#69,status#70,optimisedformobile#71,type#72,bookingnumber#73,createdat#74,updatedat#75]
  117. +- *Sort [cast(airportId#38 as int) ASC NULLS FIRST], false, 0
  118. +- Exchange hashpartitioning(cast(airportId#38 as int), 200)
  119. +- *Project [cast(airportId#18L as string) AS airportId#38, countryName#24, cityName#23, airportName#22]
  120. +- *Filter (isnotnull(airportId#18L) && isnotnull(cast(airportId#18L as string)))
  121. +- Scan ExistingRDD[airportId#18L,cityId#19L,countryId#20L,airportCode#21,airportName#22,cityName#23,countryName#24]
  122.  
  123. at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
  124. at org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:115)
  125. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
  126. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
  127. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
  128. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  129. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
  130. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
  131. at org.apache.spark.sql.execution.SortExec.doExecute(SortExec.scala:101)
  132. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
  133. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
  134. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
  135. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  136. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
  137. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
  138. at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:183)
  139. ... 45 more
  140. Caused by: org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
  141. Exchange rangepartitioning(key#250 ASC NULLS FIRST, querydatetime#101 ASC NULLS FIRST, 200)
  142. +- *Project [key#250 AS master_key#588, querydatetime#101 AS master_querydatetime#589, Id#180, QueryTaskId#181, QueryOriginPlace#182, queryoutbounddate#334, queryinbounddate#375, QueryCabinClass#185, QueryCurrency#186, Agent#187, QuoteAgeInMinutes#188, Price#189, OutboundLegId#190, InboundLegId#191, outdeparture#416, outarrival#457, OutDuration#194, OutJourneyMode#195, OutStops#196, OutCarriers#197, OutOperatingCarriers#198, NumberOutStops#199, NumberOutCarriers#200, NumberOutOperatingCarriers#201, ... 19 more fields]
  143. +- *BroadcastHashJoin [key#250, querydatetime#101], [key#590, querydatetime#213], LeftOuter, BuildRight
  144. :- *Project [key#250, querydatetime#101]
  145. : +- BroadcastNestedLoopJoin BuildRight, Cross
  146. : :- Generate explode(pythonUDF0#1633), false, false, [querydatetime#101]
  147. : : +- BatchEvalPython [generate_date_series(start#94, stop#95)], [start#94, stop#95, pythonUDF0#1633]
  148. : : +- Scan ExistingRDD[start#94,stop#95]
  149. : +- BroadcastExchange IdentityBroadcastMode
  150. : +- *HashAggregate(keys=[key#250], functions=[], output=[key#250])
  151. : +- *HashAggregate(keys=[key#250], functions=[], output=[key#250])
  152. : +- *Sample 0.0, 0.001, false, 7736333241016522154
  153. : +- *GlobalLimit 5000000
  154. : +- Exchange SinglePartition
  155. : +- *LocalLimit 5000000
  156. : +- *Project [concat(outboundlegid#190, -, inboundlegid#191, -, agent#187) AS key#250]
  157. : +- *SortMergeJoin [querydestinationplace#212], [cast(airportId#38 as int)], Inner
  158. : :- *Sort [querydestinationplace#212 ASC NULLS FIRST], false, 0
  159. : : +- Exchange hashpartitioning(querydestinationplace#212, 200)
  160. : : +- *Project [Agent#187, OutboundLegId#190, InboundLegId#191, querydestinationplace#212]
  161. : : +- *SortMergeJoin [agent#187], [id#89], Inner
  162. : : :- *Sort [agent#187 ASC NULLS FIRST], false, 0
  163. : : : +- Exchange hashpartitioning(agent#187, 200)
  164. : : : +- *Project [Agent#187, OutboundLegId#190, InboundLegId#191, querydestinationplace#212]
  165. : : : +- *Filter (isnotnull(agent#187) && isnotnull(querydestinationplace#212))
  166. : : : +- Scan ExistingRDD[Id#180,QueryTaskId#181,QueryOriginPlace#182,QueryOutboundDate#183,QueryInboundDate#184,QueryCabinClass#185,QueryCurrency#186,Agent#187,QuoteAgeInMinutes#188,Price#189,OutboundLegId#190,InboundLegId#191,OutDeparture#192,OutArrival#193,OutDuration#194,OutJourneyMode#195,OutStops#196,OutCarriers#197,OutOperatingCarriers#198,NumberOutStops#199,NumberOutCarriers#200,NumberOutOperatingCarriers#201,InDeparture#202,InArrival#203,... 10 more fields]
  167. : : +- *Sort [id#89 ASC NULLS FIRST], false, 0
  168. : : +- Exchange hashpartitioning(id#89, 200)
  169. : : +- *Project [cast(id#67L as string) AS id#89]
  170. : : +- *Filter (isnotnull(id#67L) && isnotnull(cast(id#67L as string)))
  171. : : +- Scan ExistingRDD[id#67L,name#68,imageurl#69,status#70,optimisedformobile#71,type#72,bookingnumber#73,createdat#74,updatedat#75]
  172. : +- *Sort [cast(airportId#38 as int) ASC NULLS FIRST], false, 0
  173. : +- Exchange hashpartitioning(cast(airportId#38 as int), 200)
  174. : +- *Project [cast(airportId#18L as string) AS airportId#38]
  175. : +- *Filter (isnotnull(airportId#18L) && isnotnull(cast(airportId#18L as string)))
  176. : +- Scan ExistingRDD[airportId#18L,cityId#19L,countryId#20L,airportCode#21,airportName#22,cityName#23,countryName#24]
  177. +- BroadcastExchange HashedRelationBroadcastMode(List(input[34, string, true], input[33, date, true]))
  178. +- *Project [Id#180, QueryTaskId#181, QueryOriginPlace#182, cast(QueryOutboundDate#183 as date) AS queryoutbounddate#334, cast(QueryInboundDate#184 as date) AS queryinbounddate#375, QueryCabinClass#185, QueryCurrency#186, Agent#187, QuoteAgeInMinutes#188, Price#189, OutboundLegId#190, InboundLegId#191, cast(unix_timestamp(OutDeparture#192, yyyy-MM-dd'T'HH:mm:ss, Some(Zulu)) as timestamp) AS outdeparture#416, cast(unix_timestamp(OutArrival#193, yyyy-MM-dd'T'HH:mm:ss, Some(Zulu)) as timestamp) AS outarrival#457, OutDuration#194, OutJourneyMode#195, OutStops#196, OutCarriers#197, OutOperatingCarriers#198, NumberOutStops#199, NumberOutCarriers#200, NumberOutOperatingCarriers#201, cast(unix_timestamp(InDeparture#202, yyyy-MM-dd'T'HH:mm:ss, Some(Zulu)) as timestamp) AS indeparture#498, cast(unix_timestamp(InArrival#203, yyyy-MM-dd'T'HH:mm:ss, Some(Zulu)) as timestamp) AS inarrival#539, ... 15 more fields]
  179. +- *Sample 0.0, 0.001, false, 7736333241016522154
  180. +- *GlobalLimit 5000000
  181. +- Exchange SinglePartition
  182. +- *LocalLimit 5000000
  183. +- *Project [Id#180, QueryTaskId#181, QueryOriginPlace#182, QueryOutboundDate#183, QueryInboundDate#184, QueryCabinClass#185, QueryCurrency#186, Agent#187, QuoteAgeInMinutes#188, Price#189, OutboundLegId#190, InboundLegId#191, OutDeparture#192, OutArrival#193, OutDuration#194, OutJourneyMode#195, OutStops#196, OutCarriers#197, OutOperatingCarriers#198, NumberOutStops#199, NumberOutCarriers#200, NumberOutOperatingCarriers#201, InDeparture#202, InArrival#203, ... 15 more fields]
  184. +- *SortMergeJoin [querydestinationplace#212], [cast(airportId#38 as int)], Inner
  185. :- *Sort [querydestinationplace#212 ASC NULLS FIRST], false, 0
  186. : +- Exchange hashpartitioning(querydestinationplace#212, 200)
  187. : +- *Project [Id#180, QueryTaskId#181, QueryOriginPlace#182, QueryOutboundDate#183, QueryInboundDate#184, QueryCabinClass#185, QueryCurrency#186, Agent#187, QuoteAgeInMinutes#188, Price#189, OutboundLegId#190, InboundLegId#191, OutDeparture#192, OutArrival#193, OutDuration#194, OutJourneyMode#195, OutStops#196, OutCarriers#197, OutOperatingCarriers#198, NumberOutStops#199, NumberOutCarriers#200, NumberOutOperatingCarriers#201, InDeparture#202, InArrival#203, ... 11 more fields]
  188. : +- *SortMergeJoin [Agent#187], [id#89], Inner
  189. : :- *Sort [Agent#187 ASC NULLS FIRST], false, 0
  190. : : +- Exchange hashpartitioning(Agent#187, 200)
  191. : : +- *Filter (isnotnull(Agent#187) && isnotnull(querydestinationplace#212))
  192. : : +- Scan ExistingRDD[Id#180,QueryTaskId#181,QueryOriginPlace#182,QueryOutboundDate#183,QueryInboundDate#184,QueryCabinClass#185,QueryCurrency#186,Agent#187,QuoteAgeInMinutes#188,Price#189,OutboundLegId#190,InboundLegId#191,OutDeparture#192,OutArrival#193,OutDuration#194,OutJourneyMode#195,OutStops#196,OutCarriers#197,OutOperatingCarriers#198,NumberOutStops#199,NumberOutCarriers#200,NumberOutOperatingCarriers#201,InDeparture#202,InArrival#203,... 10 more fields]
  193. : +- *Sort [id#89 ASC NULLS FIRST], false, 0
  194. : +- Exchange hashpartitioning(id#89, 200)
  195. : +- *Project [cast(id#67L as string) AS id#89, name#68]
  196. : +- *Filter (isnotnull(id#67L) && isnotnull(cast(id#67L as string)))
  197. : +- Scan ExistingRDD[id#67L,name#68,imageurl#69,status#70,optimisedformobile#71,type#72,bookingnumber#73,createdat#74,updatedat#75]
  198. +- *Sort [cast(airportId#38 as int) ASC NULLS FIRST], false, 0
  199. +- Exchange hashpartitioning(cast(airportId#38 as int), 200)
  200. +- *Project [cast(airportId#18L as string) AS airportId#38, countryName#24, cityName#23, airportName#22]
  201. +- *Filter (isnotnull(airportId#18L) && isnotnull(cast(airportId#18L as string)))
  202. +- Scan ExistingRDD[airportId#18L,cityId#19L,countryId#20L,airportCode#21,airportName#22,cityName#23,countryName#24]
  203.  
  204. at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
  205. at org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:115)
  206. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
  207. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
  208. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
  209. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  210. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
  211. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
  212. at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:252)
  213. at org.apache.spark.sql.execution.SortExec.inputRDDs(SortExec.scala:121)
  214. at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:386)
  215. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
  216. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
  217. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
  218. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  219. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
  220. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
  221. at org.apache.spark.sql.execution.python.BatchEvalPythonExec.doExecute(BatchEvalPythonExec.scala:83)
  222. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
  223. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
  224. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
  225. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  226. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
  227. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
  228. at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:252)
  229. at org.apache.spark.sql.execution.ProjectExec.inputRDDs(basicPhysicalOperators.scala:42)
  230. at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:386)
  231. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
  232. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
  233. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
  234. at org.apache.spark.rdd.RDDOperationScope$.wi
  235. thScope(RDDOperationScope.scala:151)
  236. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
  237. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
  238. at org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:88)
  239. at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:124)
  240. at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:115)
  241. at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
  242. ... 60 more
  243. Caused by: java.util.concurrent.TimeoutException: Futures timed out after [300 seconds]
  244. at scala.concurrent.impl.Promise$DefaultPromise.ready(Promise.scala:219)
  245. at scala.concurrent.impl.Promise$DefaultPromise.result(Promise.scala:223)
  246. at org.apache.spark.util.ThreadUtils$.awaitResult(ThreadUtils.scala:201)
  247. at org.apache.spark.sql.execution.exchange.BroadcastExchangeExec.doExecuteBroadcast(BroadcastExchangeExec.scala:123)
  248. at org.apache.spark.sql.execution.InputAdapter.doExecuteBroadcast(WholeStageCodegenExec.scala:248)
  249. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeBroadcast$1.apply(SparkPlan.scala:127)
  250. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeBroadcast$1.apply(SparkPlan.scala:127)
  251. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
  252. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  253. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
  254. at org.apache.spark.sql.execution.SparkPlan.executeBroadcast(SparkPlan.scala:126)
  255. at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.prepareBroadcast(BroadcastHashJoinExec.scala:98)
  256. at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.codegenOuter(BroadcastHashJoinExec.scala:242)
  257. at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.doConsume(BroadcastHashJoinExec.scala:83)
  258. at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:155)
  259. at org.apache.spark.sql.execution.ProjectExec.consume(basicPhysicalOperators.scala:36)
  260. at org.apache.spark.sql.execution.ProjectExec.doConsume(basicPhysicalOperators.scala:68)
  261. at org.apache.spark.sql.execution.CodegenSupport$class.consume(WholeStageCodegenExec.scala:155)
  262. at org.apache.spark.sql.execution.InputAdapter.consume(WholeStageCodegenExec.scala:235)
  263. at org.apache.spark.sql.execution.InputAdapter.doProduce(WholeStageCodegenExec.scala:263)
  264. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
  265. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:80)
  266. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
  267. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  268. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
  269. at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:80)
  270. at org.apache.spark.sql.execution.InputAdapter.produce(WholeStageCodegenExec.scala:235)
  271. at org.apache.spark.sql.execution.ProjectExec.doProduce(basicPhysicalOperators.scala:46)
  272. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
  273. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:80)
  274. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
  275. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  276. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
  277. at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:80)
  278. at org.apache.spark.sql.execution.ProjectExec.produce(basicPhysicalOperators.scala:36)
  279. at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.doProduce(BroadcastHashJoinExec.scala:77)
  280. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
  281. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:80)
  282. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
  283. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  284. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
  285. at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:80)
  286. at org.apache.spark.sql.execution.joins.BroadcastHashJoinExec.produce(BroadcastHashJoinExec.scala:38)
  287. at org.apache.spark.sql.execution.ProjectExec.doProduce(basicPhysicalOperators.scala:46)
  288. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:85)
  289. at org.apache.spark.sql.execution.CodegenSupport$$anonfun$produce$1.apply(WholeStageCodegenExec.scala:80)
  290. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
  291. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  292. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
  293. at org.apache.spark.sql.execution.CodegenSupport$class.produce(WholeStageCodegenExec.scala:80)
  294. at org.apache.spark.sql.execution.ProjectExec.produce(basicPhysicalOperators.scala:36)
  295. at org.apache.spark.sql.execution.WholeStageCodegenExec.doCodeGen(WholeStageCodegenExec.scala:331)
  296. at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:372)
  297. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
  298. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:117)
  299. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:138)
  300. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  301. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:135)
  302. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:116)
  303. at org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:88)
  304. at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:124)
  305. at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:115)
  306. at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
  307. ... 96 more
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement