Advertisement
Guest User

Untitled

a guest
Jan 17th, 2018
52
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.28 KB | None | 0 0
  1. #MACIEJ SASINOWSKI SZYMON PISZCZATOWSKI
  2. #execute : spark-submit /script 2000 1 1
  3. from __future__ import print_function
  4. import sys
  5. import time
  6. from pyspark.sql import SparkSession
  7.  
  8. def getFirst(listofnames):
  9. return next(iter(listofnames or []),None)
  10.  
  11. def main():
  12. spark = SparkSession\
  13. .builder\
  14. .appName("RPJS")\
  15. .getOrCreate()
  16.  
  17. listOFEverything=[]
  18.  
  19.  
  20. linesCSVMapped=spark.read.csv("/home/r/Pulpit/grypa.csv",header="true", mode="DROPMALFORMED").rdd
  21.  
  22. columnsnames= linesCSVMapped.toDF().schema.names[:-1]
  23.  
  24. uniqueDecisionClass=linesCSVMapped.map(lambda x : x[4]).distinct().collect()
  25.  
  26. for decision in uniqueDecisionClass:
  27. selectedElemetsDecisions=linesCSVMapped.filter(lambda x : x[4]==decision)
  28. listOfEverything=[]
  29. print(decision)
  30. for name in columnsnames:
  31. inner=selectedElemetsDecisions.map(lambda x: x[name])
  32. innerinner= inner.map(lambda x:(x,1)).reduceByKey(lambda a, b: a + b)
  33. sorttedValues=innerinner.sortBy(lambda x: x[1],False)
  34.  
  35. listOFEverything.append({"name":name,"value":sorttedValues.take(1)[0]})
  36.  
  37. print(listOFEverything)
  38.  
  39.  
  40.  
  41.  
  42. spark.stop()
  43.  
  44. #filtered.take(10)
  45. if __name__=="__main__":
  46. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement