Advertisement
NLinker

Spark error

Jan 24th, 2018
270
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.07 KB | None | 0 0
  1. Exception in thread "main" org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree:
  2. Exchange hashpartitioning(value#28, 200)
  3. +- *HashAggregate(keys=[value#28], functions=[], output=[value#28])
  4. +- Union
  5. :- *SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true) AS value#28]
  6. : +- *MapElements <function1>, obj#27: java.lang.String
  7. : +- *DeserializeToObject createexternalrow(artist_id#2.toString, StructField(artist_id,StringType,false)), obj#26: org.apache.spark.sql.Row
  8. : +- Scan ExistingRDD[artist_id#2]
  9. +- *SerializeFromObject [staticinvoke(class org.apache.spark.unsafe.types.UTF8String, StringType, fromString, input[0, java.lang.String, true], true) AS value#33]
  10. +- *MapElements <function1>, obj#32: java.lang.String
  11. +- *DeserializeToObject createexternalrow(artist_id#14.toString, StructField(artist_id,StringType,false)), obj#31: org.apache.spark.sql.Row
  12. +- Scan ExistingRDD[artist_id#14]
  13.  
  14. at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56)
  15. at org.apache.spark.sql.execution.exchange.ShuffleExchange.doExecute(ShuffleExchange.scala:112)
  16. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  17. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  18. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135)
  19. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  20. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132)
  21. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113)
  22. at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:235)
  23. at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:141)
  24. at org.apache.spark.sql.execution.DeserializeToObjectExec.inputRDDs(objects.scala:74)
  25. at org.apache.spark.sql.execution.MapElementsExec.inputRDDs(objects.scala:205)
  26. at org.apache.spark.sql.execution.SerializeFromObjectExec.inputRDDs(objects.scala:111)
  27. at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:368)
  28. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  29. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  30. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135)
  31. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  32. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132)
  33. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113)
  34. at org.apache.spark.sql.execution.UnionExec$$anonfun$doExecute$1.apply(basicPhysicalOperators.scala:491)
  35. at org.apache.spark.sql.execution.UnionExec$$anonfun$doExecute$1.apply(basicPhysicalOperators.scala:491)
  36. at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  37. at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  38. at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
  39. at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
  40. at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
  41. at scala.collection.AbstractTraversable.map(Traversable.scala:104)
  42. at org.apache.spark.sql.execution.UnionExec.doExecute(basicPhysicalOperators.scala:491)
  43. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  44. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  45. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135)
  46. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  47. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132)
  48. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113)
  49. at org.apache.spark.sql.execution.DeserializeToObjectExec.doExecute(objects.scala:90)
  50. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  51. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  52. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135)
  53. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  54. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132)
  55. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113)
  56. at org.apache.spark.sql.execution.MapElementsExec.doExecute(objects.scala:234)
  57. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  58. at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
  59. at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135)
  60. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  61. at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132)
  62. at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113)
  63. at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:92)
  64. at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:92)
  65. at org.apache.spark.sql.Dataset.rdd$lzycompute(Dataset.scala:2570)
  66. at org.apache.spark.sql.Dataset.rdd(Dataset.scala:2567)
  67. at com.vertigo.mapping.job.SpotifyMapping$.main(SpotifyMapping.scala:65)
  68. at com.vertigo.mapping.job.SpotifyMapping.main(SpotifyMapping.scala)
  69. Caused by: org.apache.hadoop.mapred.InvalidInputException: Input path does not exist: s3://vmi-music-data/spotify/20170909000000/full/spotify_artist.tsv
  70. at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:251)
  71. at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:270)
  72. at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:202)
  73. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
  74. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
  75. at scala.Option.getOrElse(Option.scala:121)
  76. at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
  77. at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
  78. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
  79. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
  80. at scala.Option.getOrElse(Option.scala:121)
  81. at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
  82. at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
  83. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
  84. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
  85. at scala.Option.getOrElse(Option.scala:121)
  86. at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
  87. at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
  88. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
  89. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
  90. at scala.Option.getOrElse(Option.scala:121)
  91. at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
  92. at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
  93. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
  94. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
  95. at scala.Option.getOrElse(Option.scala:121)
  96. at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
  97. at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
  98. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
  99. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
  100. at scala.Option.getOrElse(Option.scala:121)
  101. at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
  102. at org.apache.spark.rdd.UnionRDD$$anonfun$1.apply(UnionRDD.scala:84)
  103. at org.apache.spark.rdd.UnionRDD$$anonfun$1.apply(UnionRDD.scala:84)
  104. at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  105. at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  106. at scala.collection.immutable.List.foreach(List.scala:392)
  107. at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
  108. at scala.collection.immutable.List.map(List.scala:296)
  109. at org.apache.spark.rdd.UnionRDD.getPartitions(UnionRDD.scala:84)
  110. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
  111. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
  112. at scala.Option.getOrElse(Option.scala:121)
  113. at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
  114. at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
  115. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
  116. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
  117. at scala.Option.getOrElse(Option.scala:121)
  118. at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
  119. at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
  120. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
  121. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
  122. at scala.Option.getOrElse(Option.scala:121)
  123. at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
  124. at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:91)
  125. at org.apache.spark.sql.execution.exchange.ShuffleExchange$.prepareShuffleDependency(ShuffleExchange.scala:261)
  126. at org.apache.spark.sql.execution.exchange.ShuffleExchange.prepareShuffleDependency(ShuffleExchange.scala:84)
  127. at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:121)
  128. at org.apache.spark.sql.execution.exchange.ShuffleExchange$$anonfun$doExecute$1.apply(ShuffleExchange.scala:112)
  129. at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52)
  130. ... 54 more
  131. Disconnected from the target VM, address: '127.0.0.1:41996', transport: 'socket'
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement