Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import sys
- from pyspark.shell import sqlContext
- sys.path.insert(0, '.')
- from pyspark import SparkContext, SparkConf
- from pyspark.sql.functions import explode
- def strip(line: str):
- if line[-1] == ',':
- return int(line[3:-1])
- else:
- return float(line[4:-1])
- def mapper(line):
- print(line)
- return line['col'][0]
- if __name__ == "__main__":
- # conf = SparkConf().setAppName("airports").setMaster("local[*]")
- #
- # sc = SparkContext(conf = conf)
- #
- # json = sc.textFile("dataMay-31-2017.json")
- # jsonCol = json.filter(lambda line: '\t\t\t' in line)
- # jsonCol = jsonCol.map(strip)
- # print(jsonCol.collect())
- df = sqlContext.read.json('dataMay-31-2017.json', multiLine=True)
- ndf = df.select(explode(df.data))
- data = []
- paralData = SparkContext.parallelize(data)
- pairTuples = ndf.rdd.map(lambda r:tuple(r.col))
- asd = pairTuples.min()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement