Advertisement
Guest User

Untitled

a guest
Mar 26th, 2019
59
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 0.85 KB | None | 0 0
  1. import static org.apache.spark.sql.functions.*;
  2. import org.apache.spark.sql.expressions.UserDefinedFunction;
  3.  
  4.  
  5. StructField [] sf1 = new StructField[] {
  6. DataTypes.createStructField("uid",DataTypes.IntegerType, true),
  7. DataTypes.createStructField("mid",DataTypes.IntegerType,true),
  8. DataTypes.createStructField("rating",DataTypes.IntegerType, true),
  9. DataTypes.createStructField("time",DataTypes.IntegerType, true),
  10. };
  11.  
  12. StructType st1 = DataTypes.createStructType(sf1);
  13.  
  14.  
  15. Dataset<Row> mv = spark
  16. .read()
  17. .schema(st1)
  18. .format("com.databricks.spark.csv")
  19. .option("delimiter", "\t")
  20.  
  21. .csv("/home/hasura/Desktop/SparkData/u.data");
  22.  
  23.  
  24.  
  25. UserDefinedFunction increaserating = udf(
  26. (Integer s) -> s+1,DataTypes.IntegerType
  27. );
  28.  
  29. mv.
  30. withColumn("rating",increaserating.apply(mv.col("rating")))
  31. .show();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement