Guest User

Untitled

a guest
Dec 14th, 2017
417
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 10.22 KB | None | 0 0
  1. CREATE TABLE t1 (a VARCHAR, b INTEGER PRIMARY KEY);
  2.  
  3. UPSERT INTO t1 VALUES ('test1', 1);
  4.  
  5. // Insert new row with a third column 'c'
  6. UPSERT INTO t1 (a, b, c INTEGER) VALUES('test2', 2, 3);
  7.  
  8. SELECT * FROM t1;
  9. +--------+----+
  10. | A | B |
  11. +--------+----+
  12. | test1 | 1 |
  13. | test2 | 2 |
  14. +--------+----+
  15.  
  16. // Add the new column
  17. ALTER TABLE t1 ADD IF NOT EXISTS c INTEGER;
  18.  
  19. SELECT * FROM t1;
  20. +--------+----+-------+
  21. | A | B | C |
  22. +--------+----+-------+
  23. | test1 | 1 | null |
  24. | test2 | 2 | 3 |
  25. +--------+----+-------+
  26.  
  27. val readDF = sqlContext.read.format("org.apache.phoenix.spark").option("table", "T1").option("zkUrl", "jdbc:phoenix:zim1ext-vm.et-it.hs-offenburg.de:2181:/hbase-unsecure").load()
  28. readDF.printSchema
  29.  
  30. readDF: org.apache.spark.sql.DataFrame = [A: string, B: int, C: int]
  31. root
  32. |-- A: string (nullable = true)
  33. |-- B: integer (nullable = true)
  34. |-- C: integer (nullable = true)
  35.  
  36. val newDF = sqlContext.read.format("com.databricks.spark.csv").option("header", "true").option("inferSchema", "true").option("delimiter", ";").load("/public/test4.csv")
  37. newDF.printSchema
  38.  
  39. newDF: org.apache.spark.sql.DataFrame = [B: int, C: int, D: string]
  40. root
  41. |-- B: integer (nullable = true)
  42. |-- C: integer (nullable = true)
  43. |-- D: string (nullable = true) // This is a new column!!!
  44.  
  45. newDF.write.format("org.apache.phoenix.spark")
  46. .mode("overwrite")
  47. .option("table", "T1")
  48. .option("zkUrl", "jdbc:phoenix:server-url:2181:/hbase-unsecure")
  49. .save()
  50.  
  51. org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 14.0 failed 4 times, most recent failure: Lost task 0.3 in stage 14.0 (TID 31, server-url): java.sql.SQLException: Unable to resolve these column names:
  52. D
  53. Available columns with column families:
  54. 0.A,B,0.C
  55. at org.apache.phoenix.util.PhoenixRuntime.generateColumnInfo(PhoenixRuntime.java:475)
  56. at org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil.getUpsertColumnMetadataList(PhoenixConfigurationUtil.java:252)
  57. at org.apache.phoenix.spark.DataFrameFunctions$$anonfun$1.apply(DataFrameFunctions.scala:48)
  58. at org.apache.phoenix.spark.DataFrameFunctions$$anonfun$1.apply(DataFrameFunctions.scala:44)
  59. at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$22.apply(RDD.scala:717)
  60. at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$22.apply(RDD.scala:717)
  61. at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
  62. at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:313)
  63. at org.apache.spark.rdd.RDD.iterator(RDD.scala:277)
  64. at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
  65. at org.apache.spark.scheduler.Task.run(Task.scala:89)
  66. at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227)
  67. at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
  68. at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
  69. at java.lang.Thread.run(Thread.java:745)
  70. Driver stacktrace:
  71. at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1433)
  72. at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1421)
  73. at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1420)
  74. at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
  75. at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
  76. at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1420)
  77. at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:801)
  78. at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:801)
  79. at scala.Option.foreach(Option.scala:236)
  80. at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:801)
  81. at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:1642)
  82. at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1601)
  83. at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:1590)
  84. at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:48)
  85. at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:622)
  86. at org.apache.spark.SparkContext.runJob(SparkContext.scala:1831)
  87. at org.apache.spark.SparkContext.runJob(SparkContext.scala:1844)
  88. at org.apache.spark.SparkContext.runJob(SparkContext.scala:1921)
  89. at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1144)
  90. at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1074)
  91. at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1074)
  92. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
  93. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
  94. at org.apache.spark.rdd.RDD.withScope(RDD.scala:323)
  95. at org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1074)
  96. at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply$mcV$sp(PairRDDFunctions.scala:994)
  97. at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:985)
  98. at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopFile$2.apply(PairRDDFunctions.scala:985)
  99. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:150)
  100. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:111)
  101. at org.apache.spark.rdd.RDD.withScope(RDD.scala:323)
  102. at org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopFile(PairRDDFunctions.scala:985)
  103. at org.apache.phoenix.spark.DataFrameFunctions.saveToPhoenix(DataFrameFunctions.scala:58)
  104. at org.apache.phoenix.spark.DataFrameFunctions.saveToPhoenix(DataFrameFunctions.scala:27)
  105. at org.apache.phoenix.spark.DefaultSource.createRelation(DefaultSource.scala:47)
  106. at org.apache.spark.sql.execution.datasources.ResolvedDataSource$.apply(ResolvedDataSource.scala:222)
  107. at org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:148)
  108. at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:32)
  109. at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:37)
  110. at $iwC$$iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:39)
  111. at $iwC$$iwC$$iwC$$iwC$$iwC.<init>(<console>:41)
  112. at $iwC$$iwC$$iwC$$iwC.<init>(<console>:43)
  113. at $iwC$$iwC$$iwC.<init>(<console>:45)
  114. at $iwC$$iwC.<init>(<console>:47)
  115. at $iwC.<init>(<console>:49)
  116. at <init>(<console>:51)
  117. at .<init>(<console>:55)
  118. at .<clinit>(<console>)
  119. at .<init>(<console>:7)
  120. at .<clinit>(<console>)
  121. at $print(<console>)
  122. at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
  123. at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
  124. at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  125. at java.lang.reflect.Method.invoke(Method.java:498)
  126. at org.apache.spark.repl.SparkIMain$ReadEvalPrint.call(SparkIMain.scala:1065)
  127. at org.apache.spark.repl.SparkIMain$Request.loadAndRun(SparkIMain.scala:1346)
  128. at org.apache.spark.repl.SparkIMain.loadAndRunReq$1(SparkIMain.scala:840)
  129. at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:871)
  130. at org.apache.spark.repl.SparkIMain.interpret(SparkIMain.scala:819)
  131. at sun.reflect.GeneratedMethodAccessor35.invoke(Unknown Source)
  132. at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  133. at java.lang.reflect.Method.invoke(Method.java:498)
  134. at org.apache.zeppelin.spark.Utils.invokeMethod(Utils.java:38)
  135. at org.apache.zeppelin.spark.SparkInterpreter.interpret(SparkInterpreter.java:991)
  136. at org.apache.zeppelin.spark.SparkInterpreter.interpretInput(SparkInterpreter.java:1197)
  137. at org.apache.zeppelin.spark.SparkInterpreter.interpret(SparkInterpreter.java:1164)
  138. at org.apache.zeppelin.spark.SparkInterpreter.interpret(SparkInterpreter.java:1157)
  139. at org.apache.zeppelin.interpreter.LazyOpenInterpreter.interpret(LazyOpenInterpreter.java:101)
  140. at org.apache.zeppelin.interpreter.remote.RemoteInterpreterServer$InterpretJob.jobRun(RemoteInterpreterServer.java:502)
  141. at org.apache.zeppelin.scheduler.Job.run(Job.java:175)
  142. at org.apache.zeppelin.scheduler.FIFOScheduler$1.run(FIFOScheduler.java:139)
  143. at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
  144. at java.util.concurrent.FutureTask.run(FutureTask.java:266)
  145. at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$201(ScheduledThreadPoolExecutor.java:180)
  146. at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:293)
  147. at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
  148. at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
  149. at java.lang.Thread.run(Thread.java:745)
  150. Caused by: java.sql.SQLException: Unable to resolve these column names:
  151. D
  152. Available columns with column families:
  153. 0.A,B,0.C
  154. at org.apache.phoenix.util.PhoenixRuntime.generateColumnInfo(PhoenixRuntime.java:475)
  155. at org.apache.phoenix.mapreduce.util.PhoenixConfigurationUtil.getUpsertColumnMetadataList(PhoenixConfigurationUtil.java:252)
  156. at org.apache.phoenix.spark.DataFrameFunctions$$anonfun$1.apply(DataFrameFunctions.scala:48)
  157. at org.apache.phoenix.spark.DataFrameFunctions$$anonfun$1.apply(DataFrameFunctions.scala:44)
  158. at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$22.apply(RDD.scala:717)
  159. at org.apache.spark.rdd.RDD$$anonfun$mapPartitions$1$$anonfun$apply$22.apply(RDD.scala:717)
  160. at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:38)
  161. at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:313)
  162. at org.apache.spark.rdd.RDD.iterator(RDD.scala:277)
  163. at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:66)
  164. at org.apache.spark.scheduler.Task.run(Task.scala:89)
  165. at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:227)
  166. ... 3 more
Add Comment
Please, Sign In to add comment