Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from pyspark import SparkContext
- sc = SparkContext(appName = "simple app")
- sc._jsc.hadoopConfiguration().set("fs.s3n.awsAccessKeyId", "yourAccessKeyId")
- sc._jsc.hadoopConfiguration().set("fs.s3n.awsSecretAccessKey", "yourSecretAccessKey")
- text_file = sc.textFile("s3n://bucketName/filename.tar.gz")
- counts = text_file.flatMap(lambda line: line.split(" ")) \
- .map(lambda word: (word, 1)) \
- .reduceByKey(lambda a, b: a + b)
- counts.saveAsTextFile("output")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement