Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- org.apache.spark.SparkException: Job aborted.
- at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:147)
- at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:121)
- at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply(FileFormatWriter.scala:121)
- at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:57)
- at org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:121)
- at org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:101)
- at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult$lzycompute(commands.scala:58)
- at org.apache.spark.sql.execution.command.ExecutedCommandExec.sideEffectResult(commands.scala:56)
- at org.apache.spark.sql.execution.command.ExecutedCommandExec.doExecute(commands.scala:74)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$execute$1.apply(SparkPlan.scala:114)
- at org.apache.spark.sql.execution.SparkPlan$$anonfun$executeQuery$1.apply(SparkPlan.scala:135)
- at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
- at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:132)
- at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:113)
- at org.apache.spark.sql.execution.QueryExecution.toRdd$lzycompute(QueryExecution.scala:87)
- at org.apache.spark.sql.execution.QueryExecution.toRdd(QueryExecution.scala:87)
- at org.apache.spark.sql.execution.datasources.DataSource.write(DataSource.scala:492)
- at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:215)
- at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:198)
- at org.apache.spark.sql.DataFrameWriter.csv(DataFrameWriter.scala:579)
- ... 46 elided
- Caused by: org.apache.spark.SparkException: Job aborted due to stage failure: Task 116 in stage 1.0 failed 1 times, most recent failure: Lost task 116.0 in stage 1.0 (TID 234, localhost, executor driver): org.postgresql.util.PSQLException: The connection attempt failed.
- at org.postgresql.core.v3.ConnectionFactoryImpl.openConnectionImpl(ConnectionFactoryImpl.java:275)
- at org.postgresql.core.ConnectionFactory.openConnection(ConnectionFactory.java:55)
- at org.postgresql.jdbc.PgConnection.<init>(PgConnection.java:219)
- at org.postgresql.Driver.makeConnection(Driver.java:407)
- at org.postgresql.Driver.connect(Driver.java:275)
- at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory$1.apply(JdbcUtils.scala:59)
- at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory$1.apply(JdbcUtils.scala:50)
- at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD.compute(JDBCRDD.scala:286)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
- at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
- at org.apache.spark.scheduler.Task.run(Task.scala:99)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
- at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
- at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
- at java.lang.Thread.run(Thread.java:745)
- Caused by: java.net.SocketTimeoutException: connect timed out
- at java.net.PlainSocketImpl.socketConnect(Native Method)
- at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)
- at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)
- at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)
- at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
- at java.net.Socket.connect(Socket.java:589)
- at org.postgresql.core.PGStream.<init>(PGStream.java:64)
- at org.postgresql.core.v3.ConnectionFactoryImpl.openConnectionImpl(ConnectionFactoryImpl.java:147)
- ... 18 more
- Driver stacktrace:
- at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1435)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1423)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1422)
- at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
- at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
- at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1422)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
- at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:802)
- at scala.Option.foreach(Option.scala:257)
- at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:802)
- at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1650)
- at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1605)
- at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1594)
- at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
- at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:628)
- at org.apache.spark.SparkContext.runJob(SparkContext.scala:1918)
- at org.apache.spark.SparkContext.runJob(SparkContext.scala:1931)
- at org.apache.spark.SparkContext.runJob(SparkContext.scala:1951)
- at org.apache.spark.sql.execution.datasources.FileFormatWriter$$anonfun$write$1.apply$mcV$sp(FileFormatWriter.scala:127)
- ... 66 more
- Caused by: org.postgresql.util.PSQLException: The connection attempt failed.
- at org.postgresql.core.v3.ConnectionFactoryImpl.openConnectionImpl(ConnectionFactoryImpl.java:275)
- at org.postgresql.core.ConnectionFactory.openConnection(ConnectionFactory.java:55)
- at org.postgresql.jdbc.PgConnection.<init>(PgConnection.java:219)
- at org.postgresql.Driver.makeConnection(Driver.java:407)
- at org.postgresql.Driver.connect(Driver.java:275)
- at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory$1.apply(JdbcUtils.scala:59)
- at org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils$$anonfun$createConnectionFactory$1.apply(JdbcUtils.scala:50)
- at org.apache.spark.sql.execution.datasources.jdbc.JDBCRDD.compute(JDBCRDD.scala:286)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
- at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
- at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:323)
- at org.apache.spark.rdd.RDD.iterator(RDD.scala:287)
- at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:87)
- at org.apache.spark.scheduler.Task.run(Task.scala:99)
- at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:282)
- ... 3 more
- Caused by: java.net.SocketTimeoutException: connect timed out
- at java.net.PlainSocketImpl.socketConnect(Native Method)
- at java.net.AbstractPlainSocketImpl.doConnect(AbstractPlainSocketImpl.java:350)
- at java.net.AbstractPlainSocketImpl.connectToAddress(AbstractPlainSocketImpl.java:206)
- at java.net.AbstractPlainSocketImpl.connect(AbstractPlainSocketImpl.java:188)
- at java.net.SocksSocketImpl.connect(SocksSocketImpl.java:392)
- at java.net.Socket.connect(Socket.java:589)
- at org.postgresql.core.PGStream.<init>(PGStream.java:64)
- at org.postgresql.core.v3.ConnectionFactoryImpl.openConnectionImpl(ConnectionFactoryImpl.java:147)
- ... 18 more
- import java.sql.DriverManager
- import java.util.Properties
- val jdbcUrl = s"jdbc:postgresql://XXXX:5432/XXXX"
- val connectionProperties = new Properties()
- connectionProperties.put("user", "XXXX")
- connectionProperties.put("password", "XXXX")
- connectionProperties.put("partitionColumn", "channel")
- connectionProperties.put("lowerBound", "0")
- connectionProperties.put("upperBound", "1000")
- connectionProperties.put("numPartitions", "200")
- val pushdown_query = "(select * from media) media_alias"
- val mediaOccDF = spark.read.jdbc(jdbcUrl, pushdown_query, connectionProperties).cache
- println(mediaOccDF.count())
Add Comment
Please, Sign In to add comment