Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- trait DataFrameSuiteBase extends TestSuite
- with SharedSparkContext with DataFrameSuiteBaseLike { self: Suite =>
- import spark.implicits._
- override def beforeAll() {
- super.beforeAll()
- super.sqlBeforeAllTestCases()
- }
- override def afterAll() {
- super.afterAll()
- if (!reuseContextIfPossible) {
- SparkSessionProvider._sparkSession = null
- }
- }
- def csvStringToDataFrame(csv: String, schema: StructType = null): DataFrame = {
- val csvList = csv.stripMargin.lines.toList.filterNot(_.isEmpty)
- val dataset = context.parallelize(csvList).toDS
- val readCSV = spark.read.option("header", true)
- val dataFrameReader = if(schema !== null) {
- readCSV.schema(schema)
- } else {
- readCSV.option("inferSchema", true)
- }
- dataFrameReader.csv(dataset)
- }
- }
- case class Toolbox(session: SparkSession) extends Serializable {
- import session.implicits._
- def checkExtremes(dataframe: DataFrame, colName: String, bounds: Bounds): DataFrame = {
- if (hasColumn(dataframe, colName)) {
- val flagColumnName: String = s"${colName}_flag"
- val outliers =
- dataframe
- .withColumn(flagColumnName,
- when(!col(colName).between(bounds.lower, bounds.upper), true).otherwise(false))
- } else dataframe
- }
- }
- class ToolboxSpec extends FunSpec with DataFrameSuiteBase
- with DataFrameComparer
- with BeforeAndAfter {
- var Toolbox: Toolbox = _
- before {
- Toolbox = Toolbox(spark)
- }
- describe("Toolbox") {
- describe("checkExtremes") {
- it("should be checking for extreme values") {
- val inputCSV =
- """
- |"id","time","code","emi","v","t1","t2","t3","t4","t5","x_acc","y_acc","z_acc"
- |"46","2019-04-01 00:00:57","1",1444,"1",66,12,34,5,29,31,64,56,38,31,67,32,9,64,31,53
- |"46","2019-04-01 00:00:52","1",1515,"1",66,34,5,29,31,64,56,38,31,69,08,24,91,36,7
- |"46","2019-04-01 00:00:46","1",1452,"1",66,12,34,5,29,31,64,5,38,31,66,88,11,12,34,43
- |"47","2019-04-01 00:00:46","1",1452,"1",100,12,34,5,29,31,64,5,38,31,66,88,11,12,34,43
- |"77","2019-04-01 00:00:41","1",1319,"1",66,19,34,5,29,31,64,5,38,31,67,82,8,66,34,79
- """
- val inputColName = "t1"
- val flagColName = s"${inputColName}_flag"
- val expectedCSV =
- s"""
- |"id","time","code","emi","v","t1","t2","t3","t4","t5","x_acc","y_acc","z_acc","$flagColName"
- |"46","2019-04-01 00:00:57","1",1444,"1",66,12,34,5,29,31,64,56,38,31,67,32,9,64,31,53,false
- |"46","2019-04-01 00:00:52","1",1515,"1",66,34,5,29,31,64,56,38,31,69,08,24,91,36,7,false
- |"46","2019-04-01 00:00:46","1",1452,"1",66,12,34,5,29,31,64,5,38,31,66,88,11,12,34,43,false
- |"47","2019-04-01 00:00:46","1",1452,"1",100,12,34,5,29,31,64,5,38,31,66,88,11,12,34,43,true
- |"77","2019-04-01 00:00:41","1",1319,"1",66,19,34,5,29,31,64,5,38,31,67,82,8,66,34,79,false
- """
- val inputSchema = StructType(
- Array(
- StructField("id", StringType, false),
- StructField("time", TimestampType, false),
- StructField("code", StringType, true),
- StructField("emi", IntegerType, true),
- StructField("v", StringType, true),
- StructField("t1", DoubleType, true),
- StructField("t2", DoubleType, true),
- StructField("t3", DoubleType, true),
- StructField("t4", DoubleType, true),
- StructField("t5", DoubleType, true),
- StructField("x_acc", DoubleType, true),
- StructField("y_acc", DoubleType, true),
- StructField("z_acc", DoubleType, true)
- )
- )
- val expectedSchema = StructType(
- Array(
- StructField("id", StringType, false),
- StructField("time", TimestampType, false),
- StructField("code", StringType, true),
- StructField("emi", IntegerType, true),
- StructField("v", StringType, true),
- StructField("t1", DoubleType, true),
- StructField("t2", DoubleType, true),
- StructField("t3", DoubleType, true),
- StructField("t4", DoubleType, true),
- StructField("t5", DoubleType, true),
- StructField("x_acc", DoubleType, true),
- StructField("y_acc", DoubleType, true),
- StructField("z_acc", DoubleType, true),
- StructField(flagColName, BooleanType, true)
- )
- )
- val input = csvStringToDataFrame(inputCSV, inputSchema)
- val bounds = Bounds(10, 70)
- val output = Toolbox.checkExtremes(input, inputColName, bounds)
- output.show(5)
- output.printSchema()
- val expected = csvStringToDataFrame(expectedCSV, expectedSchema)
- // expected.printSchema()
- // assertSmallDatasetEquality(output, expected) // log 1
- // assertDataFrameEquals(expected, output) // log 2
- }
- }
- }
- }
- +---------+-------------------+-----------+----+----------------+-----+----+----+----+----+-----+-----+-----+---------------+
- | id | time | code | emi| v | t1| t2| t3| t4| t5|x_acc|y_acc|z_acc| t1_flag |
- +---------+-------------------+-----------+----+----------------+-----+----+----+----+----+-----+-----+-----+---------------+
- | 46|2019-04-01 00:00:57| 1|1444| 1| 66.0|12.0|34.0| 5.0|29.0| 31.0| 64.0| 56.0| false|
- | 46|2019-04-01 00:00:52| 1|1515| 1| 66.0|34.0| 5.0|29.0|31.0| 64.0| 56.0| 38.0| false|
- | 46|2019-04-01 00:00:46| 1|1452| 1| 66.0|12.0|34.0| 5.0|29.0| 31.0| 64.0| 5.0| false|
- | 47|2019-04-01 00:00:46| 1|1452| 1|100.0|12.0|34.0| 5.0|29.0| 31.0| 64.0| 5.0| true|
- | 77|2019-04-01 00:00:41| 1|1319| 1| 66.0|19.0|34.0| 5.0|29.0| 31.0| 64.0| 5.0| false|
- +---------+-------------------+-----------+----+----------------+-----+----+----+----+----+-----+-----+-----+---------------+
- root
- |-- id: string (nullable = false)
- |-- time: timestamp (nullable = false)
- |-- code: string (nullable = true)
- |-- emi: integer (nullable = true)
- |-- v: string (nullable = true)
- |-- t1: double (nullable = true)
- |-- t2: double (nullable = true)
- |-- t3: double (nullable = true)
- |-- t4: double (nullable = true)
- |-- t5: double (nullable = true)
- |-- x_acc: double (nullable = true)
- |-- y_acc: double (nullable = true)
- |-- z_acc: double (nullable = true)
- |-- t1_flag: boolean (nullable = true)
- [info] ToolboxSpec:
- [info] Toolbox
- [info] checkExtremes
- [info] - should be checking for outlier *** FAILED ***
- [info] java.lang.NullPointerException:
- [info] at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.createExternalRow_0_0$(Unknown Source)
- [info] at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
- [info] at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$collectFromPlan$1.apply(Dataset.scala:3276)
- [info] at org.apache.spark.sql.Dataset$$anonfun$org$apache$spark$sql$Dataset$$collectFromPlan$1.apply(Dataset.scala:3273)
- [info] at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
- [info] at scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
- [info] at scala.collection.IndexedSeqOptimized$class.foreach(IndexedSeqOptimized.scala:33)
- [info] at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:186)
- [info] at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
- [info] at scala.collection.mutable.ArrayOps$ofRef.map(ArrayOps.scala:186)
- [info] ...
- [info] ScalaTest
- [info] Run completed in 13 seconds, 544 milliseconds.
- [info] Total number of tests run: 130
- [info] Suites: completed 8, aborted 0
- [info] Tests: succeeded 129, failed 1, canceled 0, ignored 0, pending 0
- [info] *** 1 TEST FAILED ***
- [error] Failed: Total 130, Failed 1, Errors 0, Passed 129
- [error] Failed tests:
- [error] com.test.ToolboxSpec
- [error] (Test / test) sbt.TestsFailedException: Tests unsuccessful
- 19/05/22 16:53:14 WARN SparkContext: Using an existing SparkContext; some configuration may not take effect.
- [info] Toolbox
- [info] checkExtremes
- +---------+-------------------+-----------+----+----------------+-----+----+----+----+----+-----+-----+-----+---------------+
- | id | time | code | emi| v | t1| t2| t3| t4| t5|x_acc|y_acc|z_acc| t1 |
- +---------+-------------------+-----------+----+----------------+-----+----+----+----+----+-----+-----+-----+---------------+
- | 46|2019-04-01 00:00:57| 1|1444| 1| 66.0|12.0|34.0| 5.0|29.0| 31.0| 64.0| 56.0| false|
- | 46|2019-04-01 00:00:52| 1|1515| 1| 66.0|34.0| 5.0|29.0|31.0| 64.0| 56.0| 38.0| false|
- | 46|2019-04-01 00:00:46| 1|1452| 1| 66.0|12.0|34.0| 5.0|29.0| 31.0| 64.0| 5.0| false|
- | 47|2019-04-01 00:00:46| 1|1452| 1|100.0|12.0|34.0| 5.0|29.0| 31.0| 64.0| 5.0| true|
- | 77|2019-04-01 00:00:41| 1|1319| 1| 66.0|19.0|34.0| 5.0|29.0| 31.0| 64.0| 5.0| false|
- +---------+-------------------+-----------+----+----------------+-----+----+----+----+----+-----+-----+-----+---------------+
- root
- |-- id: string (nullable = false)
- |-- time: timestamp (nullable = false)
- |-- code: string (nullable = true)
- |-- emi: integer (nullable = true)
- |-- v: string (nullable = true)
- |-- t1: double (nullable = true)
- |-- t2: double (nullable = true)
- |-- t3: double (nullable = true)
- |-- t4: double (nullable = true)
- |-- t5: double (nullable = true)
- |-- x_acc: double (nullable = true)
- |-- y_acc: double (nullable = true)
- |-- z_acc: double (nullable = true)
- |-- t1: boolean (nullable = true)
- 19/05/22 16:53:15 WARN BlockManager: Putting block rdd_37_4 failed due to exception java.lang.NullPointerException.
- 19/05/22 16:53:15 WARN BlockManager: Putting block rdd_37_3 failed due to exception java.lang.NullPointerException.
- 19/05/22 16:53:15 WARN BlockManager: Putting block rdd_37_5 failed due to exception java.lang.NullPointerException.
- 19/05/22 16:53:15 WARN BlockManager: Putting block rdd_37_2 failed due to exception java.lang.NullPointerException.
- 19/05/22 16:53:15 WARN BlockManager: Putting block rdd_37_6 failed due to exception java.lang.NullPointerException.
- 19/05/22 16:53:15 WARN BlockManager: Block rdd_37_4 could not be removed as it was not found on disk or in memory
- 19/05/22 16:53:15 WARN BlockManager: Block rdd_37_5 could not be removed as it was not found on disk or in memory
- 19/05/22 16:53:15 WARN BlockManager: Block rdd_37_6 could not be removed as it was not found on disk or in memory
- 19/05/22 16:53:15 WARN BlockManager: Block rdd_37_2 could not be removed as it was not found on disk or in memory
- 19/05/22 16:53:15 WARN BlockManager: Block rdd_37_3 could not be removed as it was not found on disk or in memory
- 19/05/22 16:53:15 ERROR Executor: Exception in task 5.0 in stage 7.0 (TID 23)
- java.lang.NullPointerException
- at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.createExternalRow_0_0$(Unknown Source)
- at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
- at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
- at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
- at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:217)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1092)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1083)
- at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1018)
- at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1083)
- at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:809)
- at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
- at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
- at org.apache.spark.scheduler.Task.run(Task.scala:109)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
- at java.lang.Thread.run(Thread.java:748)
- 19/05/22 16:53:15 ERROR Executor: Exception in task 2.0 in stage 7.0 (TID 20)
- java.lang.NullPointerException
- at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.createExternalRow_0_0$(Unknown Source)
- at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
- at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
- at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
- at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:217)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1092)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1083)
- at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1018)
- at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1083)
- at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:809)
- at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
- at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
- at org.apache.spark.scheduler.Task.run(Task.scala:109)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
- at java.lang.Thread.run(Thread.java:748)
- 19/05/22 16:53:15 ERROR Executor: Exception in task 6.0 in stage 7.0 (TID 24)
- java.lang.NullPointerException
- at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.createExternalRow_0_0$(Unknown Source)
- at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
- at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
- at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
- at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:217)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1092)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1083)
- at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1018)
- at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1083)
- at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:809)
- at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
- at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
- at org.apache.spark.scheduler.Task.run(Task.scala:109)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
- at java.lang.Thread.run(Thread.java:748)
- 19/05/22 16:53:15 ERROR Executor: Exception in task 4.0 in stage 7.0 (TID 22)
- java.lang.NullPointerException
- at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.createExternalRow_0_0$(Unknown Source)
- at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
- at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
- at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
- at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:217)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1092)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1083)
- at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1018)
- at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1083)
- at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:809)
- at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
- at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
- at org.apache.spark.scheduler.Task.run(Task.scala:109)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
- at java.lang.Thread.run(Thread.java:748)
- 19/05/22 16:53:15 ERROR Executor: Exception in task 3.0 in stage 7.0 (TID 21)
- java.lang.NullPointerException
- at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.createExternalRow_0_0$(Unknown Source)
- at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
- at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
- at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
- at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:217)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1092)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1083)
- at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1018)
- at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1083)
- at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:809)
- at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
- at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
- at org.apache.spark.scheduler.Task.run(Task.scala:109)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
- at java.lang.Thread.run(Thread.java:748)
- 19/05/22 16:53:15 WARN TaskSetManager: Lost task 5.0 in stage 7.0 (TID 23, localhost, executor driver): java.lang.NullPointerException
- at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.createExternalRow_0_0$(Unknown Source)
- at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
- at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
- at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
- at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:217)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1092)
- at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1083)
- at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1018)
- at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1083)
- at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:809)
- at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
- at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
- at org.apache.spark.scheduler.Task.run(Task.scala:109)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
- at java.lang.Thread.run(Thread.java:748)
- 19/05/22 16:53:15 ERROR TaskSetManager: Task 5 in stage 7.0 failed 1 times; aborting job
- [info] - should be checking for outlier *** FAILED ***
- [info] org.apache.spark.SparkException: Job aborted due to stage failure: Task 5 in stage 7.0 failed 1 times, most recent failure: Lost task 5.0 in stage 7.0 (TID 23, localhost, executor driver): java.lang.NullPointerException
- [info] at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.createExternalRow_0_0$(Unknown Source)
- [info] at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
- [info] at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
- [info] at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
- [info] at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:217)
- [info] at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1092)
- [info] at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1083)
- [info] at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1018)
- [info] at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1083)
- [info] at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:809)
- [info] at org.apache.spark.rdd.RDD.getOrCompute(RDD.scala:335)
- [info] at org.apache.spark.rdd.RDD.iterator(RDD.scala:286)
- [info] at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
- [info] at org.apache.spark.scheduler.Task.run(Task.scala:109)
- [info] at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:345)
- [info] at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
- [info] at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
- [info] at java.lang.Thread.run(Thread.java:748)
- [info]
- [info] Driver stacktrace:
- [info] at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1602)
- [info] at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1590)
- [info] at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1589)
- [info] at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
- [info] at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
- [info] at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1589)
- [info] at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
- [info] at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:831)
- [info] at scala.Option.foreach(Option.scala:257)
- [info] at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:831)
- [info] ...
- [info] Cause: java.lang.NullPointerException:
- [info] at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.createExternalRow_0_0$(Unknown Source)
- [info] at org.apache.spark.sql.catalyst.expressions.GeneratedClass$SpecificSafeProjection.apply(Unknown Source)
- [info] at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
- [info] at scala.collection.Iterator$$anon$11.next(Iterator.scala:410)
- [info] at org.apache.spark.storage.memory.MemoryStore.putIteratorAsValues(MemoryStore.scala:217)
- [info] at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1092)
- [info] at org.apache.spark.storage.BlockManager$$anonfun$doPutIterator$1.apply(BlockManager.scala:1083)
- [info] at org.apache.spark.storage.BlockManager.doPut(BlockManager.scala:1018)
- [info] at org.apache.spark.storage.BlockManager.doPutIterator(BlockManager.scala:1083)
- [info] at org.apache.spark.storage.BlockManager.getOrElseUpdate(BlockManager.scala:809)
- [info] ...
- Java HotSpot(TM) 64-Bit Server VM warning: ignoring option MaxPermSize=2048M; support was removed in 8.0
- [info] ScalaTest
- [info] Run completed in 17 seconds, 965 milliseconds.
- [info] Total number of tests run: 130
- [info] Suites: completed 8, aborted 0
- [info] Tests: succeeded 129, failed 1, canceled 0, ignored 0, pending 0
- [info] *** 1 TEST FAILED ***
- [error] Failed: Total 130, Failed 1, Errors 0, Passed 129
- [error] Failed tests:
- [error] com.test.ToolboxSpec
- [error] (Test / test) sbt.TestsFailedException: Tests unsuccessful
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement