Advertisement
Guest User

Untitled

a guest
Aug 3rd, 2015
214
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.47 KB | None | 0 0
  1. from pyspark import SparkContext
  2.  
  3. sc = SparkContext(appName = "simple app")
  4.  
  5. sc._jsc.hadoopConfiguration().set("fs.s3n.awsAccessKeyId", "yourAccessKeyId")
  6. sc._jsc.hadoopConfiguration().set("fs.s3n.awsSecretAccessKey", "yourSecretAccessKey")
  7.  
  8. text_file = sc.textFile("s3n://bucketName/filename.tar.gz")
  9.  
  10. counts = text_file.flatMap(lambda line: line.split(" ")) \
  11. .map(lambda word: (word, 1)) \
  12. .reduceByKey(lambda a, b: a + b)
  13. counts.saveAsTextFile("output")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement