Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- $ export SPARK_HOME=/opt/local/spark
- $ export PYSPARK_PYTHON=/opt/local/python-3.5.1/bin/python3
- $ export PYSPARK_DRIVER_PYTHON=/opt/local/python-3.5.1/bin/python3
- export PYTHONPATH=$(ls -a ${SPARK_HOME}/python/lib/py4j-*-src.zip):${SPARK_HOME}/python:$PYTHONPATH
- $ export PYSPARK_SUBMIT_ARGS="
- --packages com.amazonaws:aws-java-sdk-pom:1.11.8,org.apache.hadoop:hadoop-aws:2.7.2
- --conf 'spark.local.dir=/mnt/ephemeral/tmp/spark'
- --driver-java-options '-XX:+UseG1GC -XX:G1HeapRegionSize=32m -XX:+ParallelRefProcEnabled -XX:MaxGCPauseMillis=300 -XX:InitiatingHeapOccupancyPercent=35'
- --driver-library-path '/opt/local/hadoop/lib/native'
- --conf 'spark.driver.memory=2g'
- --conf 'spark.driver.maxResultSize=2g'
- --conf 'spark.executor.memory=45g'
- --conf 'spark.executor.extraJavaOptions=-XX:+UseG1GC -XX:G1HeapRegionSize=32m -XX:+ParallelRefProcEnabled -XX:MaxGCPauseMillis=300 -XX:InitiatingHeapOccupancyPercent=35'
- --conf 'spark.executor.extraLibraryPath=/opt/local/hadoop/lib/native'
- --conf 'spark.executorEnv.LD_PRELOAD=/usr/lib/libjemalloc.so'
- --conf 'spark.network.timeout=600s'
- --conf 'spark.io.compression.codec=lz4'
- --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'
- --conf 'spark.kryo.referenceTracking=false'
- --conf 'spark.shuffle.io.numConnectionsPerPeer=4'
- --conf 'spark.sql.inMemoryColumnarStorage.batchSize=20000'
- --conf 'spark.sql.autoBroadcastJoinThreshold=104857600'
- --conf 'spark.sql.shuffle.partitions=800'
- pyspark-shell
- "
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement