Advertisement
Guest User

Untitled

a guest
Jun 30th, 2016
174
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.44 KB | None | 0 0
  1. $ export SPARK_HOME=/opt/local/spark
  2. $ export PYSPARK_PYTHON=/opt/local/python-3.5.1/bin/python3
  3. $ export PYSPARK_DRIVER_PYTHON=/opt/local/python-3.5.1/bin/python3
  4. export PYTHONPATH=$(ls -a ${SPARK_HOME}/python/lib/py4j-*-src.zip):${SPARK_HOME}/python:$PYTHONPATH
  5. $ export PYSPARK_SUBMIT_ARGS="
  6. --packages com.amazonaws:aws-java-sdk-pom:1.11.8,org.apache.hadoop:hadoop-aws:2.7.2
  7. --conf 'spark.local.dir=/mnt/ephemeral/tmp/spark'
  8. --driver-java-options '-XX:+UseG1GC -XX:G1HeapRegionSize=32m -XX:+ParallelRefProcEnabled -XX:MaxGCPauseMillis=300 -XX:InitiatingHeapOccupancyPercent=35'
  9. --driver-library-path '/opt/local/hadoop/lib/native'
  10. --conf 'spark.driver.memory=2g'
  11. --conf 'spark.driver.maxResultSize=2g'
  12. --conf 'spark.executor.memory=45g'
  13. --conf 'spark.executor.extraJavaOptions=-XX:+UseG1GC -XX:G1HeapRegionSize=32m -XX:+ParallelRefProcEnabled -XX:MaxGCPauseMillis=300 -XX:InitiatingHeapOccupancyPercent=35'
  14. --conf 'spark.executor.extraLibraryPath=/opt/local/hadoop/lib/native'
  15. --conf 'spark.executorEnv.LD_PRELOAD=/usr/lib/libjemalloc.so'
  16. --conf 'spark.network.timeout=600s'
  17. --conf 'spark.io.compression.codec=lz4'
  18. --conf 'spark.serializer=org.apache.spark.serializer.KryoSerializer'
  19. --conf 'spark.kryo.referenceTracking=false'
  20. --conf 'spark.shuffle.io.numConnectionsPerPeer=4'
  21. --conf 'spark.sql.inMemoryColumnarStorage.batchSize=20000'
  22. --conf 'spark.sql.autoBroadcastJoinThreshold=104857600'
  23. --conf 'spark.sql.shuffle.partitions=800'
  24. pyspark-shell
  25. "
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement