Advertisement
Guest User

Untitled

a guest
Jun 24th, 2018
128
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.69 KB | None | 0 0
  1.  
  2. import findspark
  3. findspark.init('/home/patryk/Pobrane/spark-2.1.1-bin-hadoop2.7')
  4.  
  5. from pyspark.sql import SparkSession
  6. from pyspark.sql import SQLContext, SparkSession
  7. from pyspark import SparkContext, SparkConf
  8. import json
  9. import pyspark.sql.functions as f
  10. from pyspark.sql.functions import col, udf
  11. from pyspark.sql.types import *
  12.  
  13. sc = SparkContext()
  14.  
  15.  
  16. #PRZEROBIENIE PLIKU JSON NA FORMAT OBSLUGIWANY PRZEZ PYSPARK
  17. #original file was written with pretty-print inside a list
  18. # with open('dataMay-31-2017.json') as jsonfile:
  19. #     js = json.load(jsonfile)
  20. #
  21. # buff = dict()
  22. # #write a new file with one object per line
  23. # with open("flattened.json", 'w') as outfile:
  24. #     iterator = 0
  25. #     for d in js:
  26. #         buff[d] = js[d]
  27. #         json.dump(buff, outfile)
  28. #         outfile.write('\n')
  29.  
  30. sc = SparkContext.getOrCreate()
  31. sqlContext = SQLContext(sc)
  32.  
  33. spark = SparkSession(sc)
  34.  
  35. to_sort = sqlContext.read.json("flattened.json")
  36.  
  37.  
  38. selected = to_sort.select("*", f.explode("data").alias("exploded_data"))
  39.  
  40. selected = selected.select("exploded_data")
  41.  
  42. selected.show()
  43.  
  44. selected.registerTempTable("selected")
  45.  
  46.  
  47. same_wartosci = sqlContext.sql("select exploded_data from selected").rdd.map(lambda line: {line[0][0], float(line[0][1
  48.                                                                                                              ])}).reduce
  49. B
  50. yKey(lambda line: print(line))
  51. sorted_list = []
  52.  
  53. mini = f.min("exploded_data")
  54.  
  55. #print(same_wartosci.collect())
  56. # while same_wartosci.collect():
  57. #     minimum = same_wartosci.min()
  58. #     same_wartosci = same_wartosci.filter(lambda line: minimum != line)
  59. #     print(len(same_wartosci.collect()))
  60. #     print(minimum)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement