Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- : org.apache.spark.SparkException: Job aborted due to stage failure: Task 0 in stage 1.0 failed 1 times, most recent failure: Lost task 0.0 in stage 1.0 (TID 1, localhost, executor driver): java.lang.NoClassDefFoundError: com/twitter/jsr166e/LongAdder
- import sys
- import uuid
- import time
- import os
- os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages com.datastax.spark:spark-cassandra-connector_2.11:2.4.0 pyspark-shell'
- try:
- from pyspark import SparkContext
- from pyspark import SparkConf
- from pyspark.sql import SparkSession
- from itertools import islice
- from pyspark.sql import SQLContext
- from pyspark.sql.types import *
- from pyspark.sql import Row
- from datetime import datetime
- except ImportError as e:
- print("error importing spark modules", e)
- sys.exit(1)
- conf = SparkConf().setAppName("Stand Alone Python Script").setMaster("local[*]")
- .setAll([('spark.executor.memory', '8g'),
- ('spark.executor.cores', '3'),
- ('spark.cores.max', '3'),
- ('spark.cassandra.connection.host', 'cassandra_ip'),
- ('spark.cassandra.auth.username', 'cassandra_user_name'),
- ('spark.cassandra.auth.password', 'cassandra_password'),
- ('spark.driver.memory','8g')])
- sc = SparkContext(conf=conf)
- sql_context = SQLContext(sc)
- consumer_complaints = sql_context.read.format("csv").option("header", "true").option("inferSchema", "false").load("in/Consumer_Complaints.csv")
- consumer_complaints.write
- .format("org.apache.spark.sql.cassandra")
- .mode('append')
- .options(table="table_name", keyspace="space_name")
- .save()
- sc.stop()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement