Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- val df = CSV.load(args(0))
- val sumSteps = df.sum("steps")
- import sqlContext.implicits._
- import org.apache.spark.sql.functions._
- val df = sc.parallelize(Array(10,2,3,4)).toDF("steps")
- df.select(col("steps")).rdd.map(_(0).asInstanceOf[Int]).reduce(_+_)
- //res1 Int = 19
- import org.apache.spark.sql.functions._
- val df = CSV.load(args(0))
- val sumSteps = df.agg(sum("steps")).first.get(0)
- val sumSteps: Long = df.agg(sum("steps").cast("long")).first.getLong(0)
- val sums = df.agg(sum("col1").as("sum_col1"), sum("col2").as("sum_col2"), ...).first
- df.groupBy().sum()
- df.groupby('steps').sum().show()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement