Advertisement
Guest User

Untitled

a guest
Jan 24th, 2017
129
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.88 KB | None | 0 0
  1. dataFile = ("s3n://<AWS Access Key ID>:"
  2. "<AWS Secret Access Key>"
  3. "@<mybucket>/data.txt")
  4.  
  5. Traceback (most recent call last):
  6. File "/home/adas/spark/SimpleApp.py", line 23, in <module>
  7. numAs = logData.filter(lambda s: 'a' in s).count()
  8. File "/home/adas/spark/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 1041, in count
  9. File "/home/adas/spark/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 1032, in sum
  10. File "/home/adas/spark/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 906, in fold
  11. File "/home/adas/spark/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 809, in collect
  12. File "/home/adas/spark/spark/python/lib/py4j-0.10.4-src.zip/py4j/java_gateway.py", line 1133, in __call__
  13. File "/home/adas/spark/spark/python/lib/py4j-0.10.4-src.zip/py4j/protocol.py", line 319, in get_return_value
  14. py4j.protocol.Py4JJavaError: An error occurred while calling z:org.apache.spark.api.python.PythonRDD.collectAndServe.
  15. : java.lang.IllegalArgumentException: AWS Secret Access Key must be specified as the password of a s3n URL, or by setting the fs.s3n.awsSecretAccessKey property.
  16. at org.apache.hadoop.fs.s3.S3Credentials.initialize(S3Credentials.java:86)
  17. at org.apache.hadoop.fs.s3native.Jets3tNativeFileSystemStore.initialize(Jets3tNativeFileSystemStore.java:80)
  18. at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
  19. at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
  20. at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  21. at java.lang.reflect.Method.invoke(Method.java:498)
  22. at org.apache.hadoop.io.retry.RetryInvocationHandler.invokeMethod(RetryInvocationHandler.java:191)
  23. at org.apache.hadoop.io.retry.RetryInvocationHandler.invoke(RetryInvocationHandler.java:102)
  24. at org.apache.hadoop.fs.s3native.$Proxy16.initialize(Unknown Source)
  25. at org.apache.hadoop.fs.s3native.NativeS3FileSystem.initialize(NativeS3FileSystem.java:334)
  26. at org.apache.hadoop.fs.FileSystem.createFileSystem(FileSystem.java:2669)
  27. at org.apache.hadoop.fs.FileSystem.access$200(FileSystem.java:94)
  28. at org.apache.hadoop.fs.FileSystem$Cache.getInternal(FileSystem.java:2703)
  29. at org.apache.hadoop.fs.FileSystem$Cache.get(FileSystem.java:2685)
  30. at org.apache.hadoop.fs.FileSystem.get(FileSystem.java:373)
  31. at org.apache.hadoop.fs.Path.getFileSystem(Path.java:295)
  32. at org.apache.hadoop.mapred.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:258)
  33. at org.apache.hadoop.mapred.FileInputFormat.listStatus(FileInputFormat.java:229)
  34. at org.apache.hadoop.mapred.FileInputFormat.getSplits(FileInputFormat.java:315)
  35. at org.apache.spark.rdd.HadoopRDD.getPartitions(HadoopRDD.scala:202)
  36. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
  37. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
  38. at scala.Option.getOrElse(Option.scala:121)
  39. at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
  40. at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:35)
  41. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
  42. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
  43. at scala.Option.getOrElse(Option.scala:121)
  44. at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
  45. at org.apache.spark.api.python.PythonRDD.getPartitions(PythonRDD.scala:53)
  46. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:252)
  47. at org.apache.spark.rdd.RDD$$anonfun$partitions$2.apply(RDD.scala:250)
  48. at scala.Option.getOrElse(Option.scala:121)
  49. at org.apache.spark.rdd.RDD.partitions(RDD.scala:250)
  50. at org.apache.spark.SparkContext.runJob(SparkContext.scala:1958)
  51. at org.apache.spark.rdd.RDD$$anonfun$collect$1.apply(RDD.scala:935)
  52. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151)
  53. at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112)
  54. at org.apache.spark.rdd.RDD.withScope(RDD.scala:362)
  55. at org.apache.spark.rdd.RDD.collect(RDD.scala:934)
  56. at org.apache.spark.api.python.PythonRDD$.collectAndServe(PythonRDD.scala:453)
  57. at org.apache.spark.api.python.PythonRDD.collectAndServe(PythonRDD.scala)
  58. at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
  59. at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
  60. at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
  61. at java.lang.reflect.Method.invoke(Method.java:498)
  62. at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
  63. at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
  64. at py4j.Gateway.invoke(Gateway.java:280)
  65. at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
  66. at py4j.commands.CallCommand.execute(CallCommand.java:79)
  67. at py4j.GatewayConnection.run(GatewayConnection.java:214)
  68. at java.lang.Thread.run(Thread.java:745)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement