Advertisement
Guest User

Untitled

a guest
Jun 18th, 2019
154
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 25.01 KB | None | 0 0
  1. trait DataFrameSuiteBase extends TestSuite
  2. with SharedSparkContext with DataFrameSuiteBaseLike { self: Suite =>
  3. import spark.implicits._
  4.  
  5. override def beforeAll() {
  6. super.beforeAll()
  7. super.sqlBeforeAllTestCases()
  8. }
  9.  
  10. override def afterAll() {
  11. super.afterAll()
  12. if (!reuseContextIfPossible) {
  13. SparkSessionProvider._sparkSession = null
  14. }
  15. }
  16.  
  17. def csvStringToDataFrame(csv: String, schema: StructType = null): DataFrame = {
  18. val csvList = csv.stripMargin.lines.toList.filterNot(_.isEmpty)
  19. val dataset = context.parallelize(csvList).toDS
  20. val readCSV = spark.read.option("header", true)
  21. val dataFrameReader = if(schema !== null) {
  22. readCSV.schema(schema)
  23. } else {
  24. readCSV.option("inferSchema", true)
  25. }
  26. dataFrameReader.csv(dataset)
  27. }
  28.  
  29. }
  30.  
  31. case class Toolbox(session: SparkSession) extends Serializable {
  32.  
  33. import session.implicits._
  34.  
  35. def checkExtremes(dataframe: DataFrame, colName: String, bounds: Bounds): DataFrame = {
  36. if (hasColumn(dataframe, colName)) {
  37. val flagColumnName: String = s"${colName}_flag"
  38. val outliers =
  39. dataframe
  40. .withColumn(flagColumnName,
  41. when(!col(colName).between(bounds.lower, bounds.upper), true).otherwise(false))
  42. } else dataframe
  43. }
  44. }
  45.  
  46.  
  47. class ToolboxSpec extends FunSpec with DataFrameSuiteBase
  48. with DataFrameComparer
  49. with BeforeAndAfter {
  50.  
  51. var Toolbox: Toolbox = _
  52.  
  53. before {
  54. Toolbox = Toolbox(spark)
  55. }
  56.  
  57. describe("Toolbox") {
  58.  
  59. describe("checkExtremes") {
  60. it("should be checking for extreme values") {
  61. val inputCSV =
  62. """
  63. |"id","time","code","emi","v","t1","t2","t3","t4","t5","x_acc","y_acc","z_acc"
  64. |"46","2019-04-01 00:00:57","1",1444,"1",66,12,34,5,29,31,64,56,38,31,67,32,9,64,31,53
  65. |"46","2019-04-01 00:00:52","1",1515,"1",66,34,5,29,31,64,56,38,31,69,08,24,91,36,7
  66. |"46","2019-04-01 00:00:46","1",1452,"1",66,12,34,5,29,31,64,5,38,31,66,88,11,12,34,43
  67. |"47","2019-04-01 00:00:46","1",1452,"1",100,12,34,5,29,31,64,5,38,31,66,88,11,12,34,43
  68. |"77","2019-04-01 00:00:41","1",1319,"1",66,19,34,5,29,31,64,5,38,31,67,82,8,66,34,79
  69. """
  70. val inputColName = "t1"
  71. val flagColName = s"${inputColName}_flag"
  72. val expectedCSV =
  73. s"""
  74. |"id","time","code","emi","v","t1","t2","t3","t4","t5","x_acc","y_acc","z_acc","$flagColName"
  75. |"46","2019-04-01 00:00:57","1",1444,"1",66,12,34,5,29,31,64,56,38,31,67,32,9,64,31,53,false
  76. |"46","2019-04-01 00:00:52","1",1515,"1",66,34,5,29,31,64,56,38,31,69,08,24,91,36,7,false
  77. |"46","2019-04-01 00:00:46","1",1452,"1",66,12,34,5,29,31,64,5,38,31,66,88,11,12,34,43,false
  78. |"47","2019-04-01 00:00:46","1",1452,"1",100,12,34,5,29,31,64,5,38,31,66,88,11,12,34,43,true
  79. |"77","2019-04-01 00:00:41","1",1319,"1",66,19,34,5,29,31,64,5,38,31,67,82,8,66,34,79,false
  80. """
  81. val inputSchema = StructType(
  82. Array(
  83. StructField("id", StringType, false),
  84. StructField("time", TimestampType, false),
  85. StructField("code", StringType, true),
  86. StructField("emi", IntegerType, true),
  87. StructField("v", StringType, true),
  88. StructField("t1", DoubleType, true),
  89. StructField("t2", DoubleType, true),
  90. StructField("t3", DoubleType, true),
  91. StructField("t4", DoubleType, true),
  92. StructField("t5", DoubleType, true),
  93. StructField("x_acc", DoubleType, true),
  94. StructField("y_acc", DoubleType, true),
  95. StructField("z_acc", DoubleType, true)
  96. )
  97. )
  98. val expectedSchema = StructType(
  99. Array(
  100. StructField("id", StringType, false),
  101. StructField("time", TimestampType, false),
  102. StructField("code", StringType, true),
  103. StructField("emi", IntegerType, true),
  104. StructField("v", StringType, true),
  105. StructField("t1", DoubleType, true),
  106. StructField("t2", DoubleType, true),
  107. StructField("t3", DoubleType, true),
  108. StructField("t4", DoubleType, true),
  109. StructField("t5", DoubleType, true),
  110. StructField("x_acc", DoubleType, true),
  111. StructField("y_acc", DoubleType, true),
  112. StructField("z_acc", DoubleType, true),
  113. StructField(flagColName, BooleanType, true)
  114. )
  115. )
  116. val input = csvStringToDataFrame(inputCSV, inputSchema)
  117. val bounds = Bounds(10, 70)
  118. val output = Toolbox.checkExtremes(input, inputColName, bounds)
  119. output.show(5)
  120. output.printSchema()
  121. val expected = csvStringToDataFrame(expectedCSV, expectedSchema)
  122. // expected.printSchema()
  123. // assertSmallDatasetEquality(output, expected) // log 1
  124. // assertDataFrameEquals(expected, output) // log 2
  125. }
  126. }
  127. }
  128.  
  129. }
  130.  
  131. +---------+-------------------+-----------+----+----------------+-----+----+----+----+----+-----+-----+-----+---------------+
  132. | id | time | code | emi| v | t1| t2| t3| t4| t5|x_acc|y_acc|z_acc| t1_flag |
  133. +---------+-------------------+-----------+----+----------------+-----+----+----+----+----+-----+-----+-----+---------------+
  134. | 46|2019-04-01 00:00:57| 1|1444| 1| 66.0|12.0|34.0| 5.0|29.0| 31.0| 64.0| 56.0| false|
  135. | 46|2019-04-01 00:00:52| 1|1515| 1| 66.0|34.0| 5.0|29.0|31.0| 64.0| 56.0| 38.0| false|
  136. | 46|2019-04-01 00:00:46| 1|1452| 1| 66.0|12.0|34.0| 5.0|29.0| 31.0| 64.0| 5.0| false|
  137. | 47|2019-04-01 00:00:46| 1|1452| 1|100.0|12.0|34.0| 5.0|29.0| 31.0| 64.0| 5.0| true|
  138. | 77|2019-04-01 00:00:41| 1|1319| 1| 66.0|19.0|34.0| 5.0|29.0| 31.0| 64.0| 5.0| false|
  139. +---------+-------------------+-----------+----+----------------+-----+----+----+----+----+-----+-----+-----+---------------+
  140.  
  141. root
  142. |-- id: string (nullable = false)
  143. |-- time: timestamp (nullable = false)
  144. |-- code: string (nullable = true)
  145. |-- emi: integer (nullable = true)
  146. |-- v: string (nullable = true)
  147. |-- t1: double (nullable = true)
  148. |-- t2: double (nullable = true)
  149. |-- t3: double (nullable = true)
  150. |-- t4: double (nullable = true)
  151. |-- t5: double (nullable = true)
  152. |-- x_acc: double (nullable = true)
  153. |-- y_acc: double (nullable = true)
  154. |-- z_acc: double (nullable = true)
  155. |-- t1_flag: boolean (nullable = true)
  156.  
  157. [info] ToolboxSpec:
  158. [info] Toolbox
  159. [info] checkExtremes
  160. [info] - should be checking for outlier *** FAILED ***
  161. [info] java.lang.NullPointerException:
  162. [info] at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.createExternalRow_0_0$(Unknown Source)
  163. [info] at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
  164. [info] at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$collectFromPlan$1.apply(Dataset.scala:3276)
  165. [info] at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$collectFromPlan$1.apply(Dataset.scala:3273)
  166. [info] at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  167. [info] at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
  168. [info] at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
  169. [info] at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
  170. [info] at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
  171. [info] at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
  172. [info] ...
  173. [info] ScalaTest
  174. [info] Run completed in 13 seconds, 544 milliseconds.
  175. [info] Total number of tests run: 130
  176. [info] Suites: completed 8, aborted 0
  177. [info] Tests: succeeded 129, failed 1, canceled 0, ignored 0, pending 0
  178. [info] *** 1 TEST FAILED ***
  179. [error] Failed: Total 130, Failed 1, Errors 0, Passed 129
  180. [error] Failed tests:
  181. [error] com.test.ToolboxSpec
  182. [error] (Test / test) sbt.TestsFailedException: Tests unsuccessful
  183.  
  184. 19/05/22 16:53:14 WARN SparkContext: Using an existing SparkContext; some configuration may not take effect.
  185. [info] Toolbox
  186. [info] checkExtremes
  187. +---------+-------------------+-----------+----+----------------+-----+----+----+----+----+-----+-----+-----+---------------+
  188. | id | time | code | emi| v | t1| t2| t3| t4| t5|x_acc|y_acc|z_acc| t1 |
  189. +---------+-------------------+-----------+----+----------------+-----+----+----+----+----+-----+-----+-----+---------------+
  190. | 46|2019-04-01 00:00:57| 1|1444| 1| 66.0|12.0|34.0| 5.0|29.0| 31.0| 64.0| 56.0| false|
  191. | 46|2019-04-01 00:00:52| 1|1515| 1| 66.0|34.0| 5.0|29.0|31.0| 64.0| 56.0| 38.0| false|
  192. | 46|2019-04-01 00:00:46| 1|1452| 1| 66.0|12.0|34.0| 5.0|29.0| 31.0| 64.0| 5.0| false|
  193. | 47|2019-04-01 00:00:46| 1|1452| 1|100.0|12.0|34.0| 5.0|29.0| 31.0| 64.0| 5.0| true|
  194. | 77|2019-04-01 00:00:41| 1|1319| 1| 66.0|19.0|34.0| 5.0|29.0| 31.0| 64.0| 5.0| false|
  195. +---------+-------------------+-----------+----+----------------+-----+----+----+----+----+-----+-----+-----+---------------+
  196.  
  197. root
  198. |-- id: string (nullable = false)
  199. |-- time: timestamp (nullable = false)
  200. |-- code: string (nullable = true)
  201. |-- emi: integer (nullable = true)
  202. |-- v: string (nullable = true)
  203. |-- t1: double (nullable = true)
  204. |-- t2: double (nullable = true)
  205. |-- t3: double (nullable = true)
  206. |-- t4: double (nullable = true)
  207. |-- t5: double (nullable = true)
  208. |-- x_acc: double (nullable = true)
  209. |-- y_acc: double (nullable = true)
  210. |-- z_acc: double (nullable = true)
  211. |-- t1: boolean (nullable = true)
  212.  
  213. 19/05/22 16:53:15 WARN BlockManager: Putting block rdd_37_4 failed due to exception java.lang.NullPointerException.
  214. 19/05/22 16:53:15 WARN BlockManager: Putting block rdd_37_3 failed due to exception java.lang.NullPointerException.
  215. 19/05/22 16:53:15 WARN BlockManager: Putting block rdd_37_5 failed due to exception java.lang.NullPointerException.
  216. 19/05/22 16:53:15 WARN BlockManager: Putting block rdd_37_2 failed due to exception java.lang.NullPointerException.
  217. 19/05/22 16:53:15 WARN BlockManager: Putting block rdd_37_6 failed due to exception java.lang.NullPointerException.
  218. 19/05/22 16:53:15 WARN BlockManager: Block rdd_37_4 could not be removed as it was not found on disk or in memory
  219. 19/05/22 16:53:15 WARN BlockManager: Block rdd_37_5 could not be removed as it was not found on disk or in memory
  220. 19/05/22 16:53:15 WARN BlockManager: Block rdd_37_6 could not be removed as it was not found on disk or in memory
  221. 19/05/22 16:53:15 WARN BlockManager: Block rdd_37_2 could not be removed as it was not found on disk or in memory
  222. 19/05/22 16:53:15 WARN BlockManager: Block rdd_37_3 could not be removed as it was not found on disk or in memory
  223. 19/05/22 16:53:15 ERROR Executor: Exception in task 5.0 in stage 7.0 (TID 23)
  224. java.lang.NullPointerException
  225. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.createExternalRow_0_0$(Unknown Source)
  226. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
  227. at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
  228. at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
  229. at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:217)
  230. at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1092)
  231. at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1083)
  232. at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1018)
  233. at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1083)
  234. at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:809)
  235. at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
  236. at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
  237. at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
  238. at org.apache.spark.scheduler.Task.run(Task.scala:109)
  239. at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
  240. at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
  241. at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
  242. at java.lang.Thread.run(Thread.java:748)
  243. 19/05/22 16:53:15 ERROR Executor: Exception in task 2.0 in stage 7.0 (TID 20)
  244. java.lang.NullPointerException
  245. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.createExternalRow_0_0$(Unknown Source)
  246. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
  247. at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
  248. at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
  249. at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:217)
  250. at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1092)
  251. at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1083)
  252. at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1018)
  253. at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1083)
  254. at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:809)
  255. at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
  256. at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
  257. at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
  258. at org.apache.spark.scheduler.Task.run(Task.scala:109)
  259. at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
  260. at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
  261. at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
  262. at java.lang.Thread.run(Thread.java:748)
  263. 19/05/22 16:53:15 ERROR Executor: Exception in task 6.0 in stage 7.0 (TID 24)
  264. java.lang.NullPointerException
  265. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.createExternalRow_0_0$(Unknown Source)
  266. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
  267. at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
  268. at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
  269. at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:217)
  270. at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1092)
  271. at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1083)
  272. at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1018)
  273. at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1083)
  274. at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:809)
  275. at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
  276. at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
  277. at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
  278. at org.apache.spark.scheduler.Task.run(Task.scala:109)
  279. at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
  280. at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
  281. at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
  282. at java.lang.Thread.run(Thread.java:748)
  283. 19/05/22 16:53:15 ERROR Executor: Exception in task 4.0 in stage 7.0 (TID 22)
  284. java.lang.NullPointerException
  285. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.createExternalRow_0_0$(Unknown Source)
  286. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
  287. at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
  288. at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
  289. at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:217)
  290. at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1092)
  291. at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1083)
  292. at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1018)
  293. at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1083)
  294. at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:809)
  295. at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
  296. at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
  297. at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
  298. at org.apache.spark.scheduler.Task.run(Task.scala:109)
  299. at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
  300. at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
  301. at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
  302. at java.lang.Thread.run(Thread.java:748)
  303. 19/05/22 16:53:15 ERROR Executor: Exception in task 3.0 in stage 7.0 (TID 21)
  304. java.lang.NullPointerException
  305. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.createExternalRow_0_0$(Unknown Source)
  306. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
  307. at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
  308. at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
  309. at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:217)
  310. at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1092)
  311. at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1083)
  312. at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1018)
  313. at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1083)
  314. at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:809)
  315. at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
  316. at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
  317. at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
  318. at org.apache.spark.scheduler.Task.run(Task.scala:109)
  319. at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
  320. at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
  321. at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
  322. at java.lang.Thread.run(Thread.java:748)
  323. 19/05/22 16:53:15 WARN TaskSetManager: Lost task 5.0 in stage 7.0 (TID 23, localhost, executor driver): java.lang.NullPointerException
  324. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.createExternalRow_0_0$(Unknown Source)
  325. at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
  326. at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
  327. at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
  328. at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:217)
  329. at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1092)
  330. at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1083)
  331. at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1018)
  332. at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1083)
  333. at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:809)
  334. at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
  335. at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
  336. at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
  337. at org.apache.spark.scheduler.Task.run(Task.scala:109)
  338. at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
  339. at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
  340. at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
  341. at java.lang.Thread.run(Thread.java:748)
  342.  
  343. 19/05/22 16:53:15 ERROR TaskSetManager: Task 5 in stage 7.0 failed 1 times; aborting job
  344. [info] - should be checking for outlier *** FAILED ***
  345. [info] org.apache.spark.SparkException: Job aborted due to stage failure: Task 5 in stage 7.0 failed 1 times, most recent failure: Lost task 5.0 in stage 7.0 (TID 23, localhost, executor driver): java.lang.NullPointerException
  346. [info] at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.createExternalRow_0_0$(Unknown Source)
  347. [info] at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
  348. [info] at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
  349. [info] at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
  350. [info] at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:217)
  351. [info] at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1092)
  352. [info] at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1083)
  353. [info] at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1018)
  354. [info] at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1083)
  355. [info] at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:809)
  356. [info] at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
  357. [info] at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
  358. [info] at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
  359. [info] at org.apache.spark.scheduler.Task.run(Task.scala:109)
  360. [info] at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
  361. [info] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
  362. [info] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
  363. [info] at java.lang.Thread.run(Thread.java:748)
  364. [info]
  365. [info] Driver stacktrace:
  366. [info] at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1602)
  367. [info] at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1590)
  368. [info] at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1589)
  369. [info] at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
  370. [info] at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
  371. [info] at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1589)
  372. [info] at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
  373. [info] at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
  374. [info] at scala.Option.foreach(Option.scala:257)
  375. [info] at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)
  376. [info] ...
  377. [info] Cause: java.lang.NullPointerException:
  378. [info] at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.createExternalRow_0_0$(Unknown Source)
  379. [info] at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
  380. [info] at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
  381. [info] at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
  382. [info] at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:217)
  383. [info] at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1092)
  384. [info] at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1083)
  385. [info] at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1018)
  386. [info] at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1083)
  387. [info] at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:809)
  388. [info] ...
  389. Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=2048M; support was removed in 8.0
  390. [info] ScalaTest
  391. [info] Run completed in 17 seconds, 965 milliseconds.
  392. [info] Total number of tests run: 130
  393. [info] Suites: completed 8, aborted 0
  394. [info] Tests: succeeded 129, failed 1, canceled 0, ignored 0, pending 0
  395. [info] *** 1 TEST FAILED ***
  396. [error] Failed: Total 130, Failed 1, Errors 0, Passed 129
  397. [error] Failed tests:
  398. [error] com.test.ToolboxSpec
  399. [error] (Test / test) sbt.TestsFailedException: Tests unsuccessful
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement