Advertisement
Guest User

Untitled

a guest
Mar 20th, 2019
89
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.77 KB | None | 0 0
  1. +-----------------+---------------------+
  2. | document1 | document2 |
  3. +-----------------+---------------------+
  4. | word1 word2 | word2 word3 |
  5. +-----------------+---------------------+
  6.  
  7. sqlContext.udf().register("intersection", new UDF2<String, String, Double>() {
  8. @Override
  9. public Double call(String arg, String arg2) throws Exception {
  10. double key = inter(arg, arg2);
  11. return key;
  12. }
  13. }, DataTypes.DoubleType);
  14. v.registerTempTable("v_table");
  15.  
  16. Dataset<Row> df = sqlContext.sql("select document, document1, "
  17. + "intersection(document, document1) as RowKey1,"
  18. + " from v_table");
  19. df.show();
  20.  
  21. INFO SparkSqlParser: Parsing command: select document, document1, intersection(v.col(document), v.col(document1)) as RowKey1, from v_table
  22. Exception in thread "main" org.apache.spark.sql.AnalysisException: cannot resolve '`document`' given input columns: []; line 1 pos 7
  23.  
  24. root
  25. |-- document: string (nullable = true)
  26. |-- document1: string (nullable = true)
  27.  
  28. Dataset<Row> ds = spark.sql("select 'word1 word2' as document1, 'word2 word3' as document2");
  29. ds.show();
  30.  
  31. Dataset<Row> ds1 = ds.select(functions.explode(functions.split(ds.col("document1"), " ")).as("word"));
  32. Dataset<Row> ds2 = ds.select(functions.explode(functions.split(ds.col("document2"), " ")).as("word"));
  33.  
  34. Dataset<Row> intersection = ds1.join(ds2, ds1.col("word").equalTo(ds2.col("word"))).select(ds1.col("word").as("Common words"));
  35. intersection.show();
  36.  
  37. +-----------+-----------+
  38. | document1| document2|
  39. +-----------+-----------+
  40. |word1 word2|word2 word3|
  41. +-----------+-----------+
  42. +------------+
  43. |Common words|
  44. +------------+
  45. | word2|
  46. +------------+
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement