Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import findspark
- findspark.init("/usr/local/spark/")
- import pyspark
- findspark.find()
- def create_sc():
- sc_conf = pyspark.SparkConf().setMaster("local")
- sc_conf.setAppName("Anna")
- sc_conf.set("spark.dynamicAllocation.enabled", True)
- sc_conf.set("spark.shuffle.service.enabled", True)
- sc_conf.set('spark.seria<lizer', "org.apache.spark.serializer.KryoSerializer")
- sc_conf.set('spark.executor.memory', "32g")
- sc_conf.set('spark.sql.parquet.binaryAsString',True)
- sc_conf.set("spark.sql.execution.arrow.enabled", "true")
- sc_conf.set("spark.dynamicAllocation.minExecutors", "5");
- sc_conf.set("spark.dynamicAllocation.maxExecutors", "30");
- sc_conf.set("spark.dynamicAllocation.initialExecutors", "10");
- sc_conf.set("spark.rpc.message.maxSize", 1024)
- print(sc_conf.getAll())
- global sc
- try:
- if sc is not None:
- sc.stop()
- sc1 = pyspark.SparkContext(conf=sc_conf)
- except:
- sc1 = pyspark.SparkContext(conf=sc_conf)
- return sc1
- def dir_exist(sc, path):
- fs = sc._jvm.org.apache.hadoop.fs.psfileSystem.get(sc._jsc.hadoopConfiguration())
- jpath = sc._jvm.org.apache.hadoop.fs.Path(path)
- return fs.exists(jpath) and fs.isDirectory(jpath)
- sc = create_sc()
- sqlContext = pyspark.SQLContext(sc)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement