Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- sys.path.insert(0, '.')
- from pyspark import SparkContext, SparkConf
- SparkContext.setSystemProperty('spark.executor.memory', '100g')
- from Utils import Utils
- def splitComma(line):
- splits = Utils.COMMA_DELIMITER.split(line)
- return "{}, {}".format(splits[1], splits[6])
- '''
- Create a Spark program to read the airport data from in/airports.text, find all the airports whose latitude are bigger than 40.
- Then output the airport's name and the airport's latitude to out/airports_by_latitude.text.
- Each row of the input file contains the following columns:
- Airport ID, Name of airport, Main city served by airport, Country where airport is located, IATA/FAA code,
- ICAO Code, Latitude, Longitude, Altitude, Timezone, DST, Timezone in Olson format
- from pyspark import SparkContext
- SparkContext.setSystemProperty('spark.executor.memory', '2g')
- sc = SparkContext("local", "App Name")
- Sample output:
- "St Anthony", 51.391944
- "Tofino", 49.082222
- ...
- '''
- if __name__ == "__main__":
- conf = SparkConf().setAppName("airports").setMaster("local[4]")
- sc = SparkContext(conf = conf)
- airports = sc.textFile("in/airports.text")
- lattitudeover40 = airports.filter(lambda line : float(Utils.COMMA_DELIMITER.split(line)[6]) > 40)
- airportsNameAndLattitude = lattitudeover40.map(splitComma)
- airportsNameAndLattitude.saveAsTextFile("out/lattitudeoverforty.text")
Add Comment
Please, Sign In to add comment