Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import static org.apache.spark.sql.functions.*;
- import org.apache.spark.sql.expressions.UserDefinedFunction;
- StructField [] sf1 = new StructField[] {
- DataTypes.createStructField("uid",DataTypes.IntegerType, true),
- DataTypes.createStructField("mid",DataTypes.IntegerType,true),
- DataTypes.createStructField("rating",DataTypes.IntegerType, true),
- DataTypes.createStructField("time",DataTypes.IntegerType, true),
- };
- StructType st1 = DataTypes.createStructType(sf1);
- Dataset<Row> mv = spark
- .read()
- .schema(st1)
- .format("com.databricks.spark.csv")
- .option("delimiter", "\t")
- .csv("/home/hasura/Desktop/SparkData/u.data");
- UserDefinedFunction increaserating = udf(
- (Integer s) -> s+1,DataTypes.IntegerType
- );
- mv.
- withColumn("rating",increaserating.apply(mv.col("rating")))
- .show();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement