Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import com.datastax.spark.connector.cql.CassandraConnector
- import org.apache.spark.{SparkContext, SparkConf}
- import org.apache.spark.sql.{Row, SQLContext}
- /** Spark SQL: Txt, Parquet, JSON Support with the Spark Cassandra Connector */
- object SampleJson extends App {
- import com.datastax.spark.connector._
- import GitHubEvents._
- val conf = new SparkConf(true)
- /** .set("spark.cassandra.connection.host", "127.0.0.1") */
- /** .setMaster("local[*]") */
- .setAppName("app2")
- val sc = new SparkContext(conf)
- val sqlContext = new org.apache.spark.sql.SQLContext(sc)
- val json = sc.parallelize(Seq("""{"user":"helena","commits":98, "month":12, "year":2014}""","""{"user":"pkolaczk", "commits":42, "month":12, "year":2014}"""))
- sqlContext.jsonRDD(json).map(MonthlyCommits(_)).saveToCassandra("githubstats","monthly_commits")
- sc.cassandraTable[MonthlyCommits]("githubstats","monthly_commits").collect foreach println
- sc.stop()
- }
- object GitHubEvents {
- sealed trait Stat extends Serializable
- trait User extends Stat
- trait Repo extends Stat
- trait Commits extends Stat
- case class MonthlyCommits(user: String, commits: Int, month: Int, year: Int) extends Commits
- object MonthlyCommits {
- def apply(r: Row): MonthlyCommits = MonthlyCommits(
- r.getString(0), r.getInt(1), r.getInt(2), r.getInt(3))
- }
- }
Add Comment
Please, Sign In to add comment