Untitled

+-----------------+---------------------+
|       document1  |   document2        |
+-----------------+---------------------+
|    word1 word2  |   word2 word3       |
+-----------------+---------------------+

sqlContext.udf().register("intersection", new UDF2<String, String, Double>() {
            @Override
            public Double call(String arg, String arg2) throws Exception {
            double key = inter(arg, arg2);
            return key;
            }
            }, DataTypes.DoubleType);
  v.registerTempTable("v_table");

Dataset<Row> df = sqlContext.sql("select document, document1, "
                + "intersection(document, document1) as RowKey1,"
                + " from v_table");
        df.show();

INFO SparkSqlParser: Parsing command: select document, document1, intersection(v.col(document), v.col(document1)) as RowKey1, from v_table
Exception in thread "main" org.apache.spark.sql.AnalysisException: cannot resolve '`document`' given input columns: []; line 1 pos 7

root
 |-- document: string (nullable = true)
 |-- document1: string (nullable = true)

Dataset<Row> ds = spark.sql("select 'word1 word2' as document1, 'word2 word3' as document2");
ds.show();

Dataset<Row> ds1 = ds.select(functions.explode(functions.split(ds.col("document1"), " ")).as("word"));
Dataset<Row> ds2 = ds.select(functions.explode(functions.split(ds.col("document2"), " ")).as("word"));

Dataset<Row> intersection = ds1.join(ds2, ds1.col("word").equalTo(ds2.col("word"))).select(ds1.col("word").as("Common words"));
intersection.show();

+-----------+-----------+
|  document1|  document2|
+-----------+-----------+
|word1 word2|word2 word3|
+-----------+-----------+
+------------+
|Common words|
+------------+
|       word2|
+------------+