Advertisement
Guest User

Untitled

a guest
Mar 26th, 2020
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.31 KB | None | 0 0
  1. import findspark
  2.  
  3. findspark.init("/usr/local/spark/")
  4. import pyspark
  5.  
  6. findspark.find()
  7.  
  8. def create_sc():
  9. sc_conf = pyspark.SparkConf().setMaster("local")
  10. sc_conf.setAppName("Anna")
  11. sc_conf.set("spark.dynamicAllocation.enabled", True)
  12. sc_conf.set("spark.shuffle.service.enabled", True)
  13. sc_conf.set('spark.seria<lizer', "org.apache.spark.serializer.KryoSerializer")
  14. sc_conf.set('spark.executor.memory', "32g")
  15. sc_conf.set('spark.sql.parquet.binaryAsString',True)
  16. sc_conf.set("spark.sql.execution.arrow.enabled", "true")
  17. sc_conf.set("spark.dynamicAllocation.minExecutors", "5");
  18. sc_conf.set("spark.dynamicAllocation.maxExecutors", "30");
  19. sc_conf.set("spark.dynamicAllocation.initialExecutors", "10");
  20. sc_conf.set("spark.rpc.message.maxSize", 1024)
  21.  
  22. print(sc_conf.getAll())
  23. global sc
  24.  
  25. try:
  26. if sc is not None:
  27. sc.stop()
  28. sc1 = pyspark.SparkContext(conf=sc_conf)
  29. except:
  30. sc1 = pyspark.SparkContext(conf=sc_conf)
  31. return sc1
  32.  
  33.  
  34. def dir_exist(sc, path):
  35. fs = sc._jvm.org.apache.hadoop.fs.psfileSystem.get(sc._jsc.hadoopConfiguration())
  36. jpath = sc._jvm.org.apache.hadoop.fs.Path(path)
  37. return fs.exists(jpath) and fs.isDirectory(jpath)
  38.  
  39.  
  40. sc = create_sc()
  41. sqlContext = pyspark.SQLContext(sc)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement