+-----------------+---------------------+ | document1 | document2 | +-----------------+---------------------+ | word1 word2 | word2 word3 | +-----------------+---------------------+ sqlContext.udf().register("intersection", new UDF2() { @Override public Double call(String arg, String arg2) throws Exception { double key = inter(arg, arg2); return key; } }, DataTypes.DoubleType); v.registerTempTable("v_table"); Dataset df = sqlContext.sql("select document, document1, " + "intersection(document, document1) as RowKey1," + " from v_table"); df.show(); INFO SparkSqlParser: Parsing command: select document, document1, intersection(v.col(document), v.col(document1)) as RowKey1, from v_table Exception in thread "main" org.apache.spark.sql.AnalysisException: cannot resolve '`document`' given input columns: []; line 1 pos 7 root |-- document: string (nullable = true) |-- document1: string (nullable = true) Dataset ds = spark.sql("select 'word1 word2' as document1, 'word2 word3' as document2"); ds.show(); Dataset ds1 = ds.select(functions.explode(functions.split(ds.col("document1"), " ")).as("word")); Dataset ds2 = ds.select(functions.explode(functions.split(ds.col("document2"), " ")).as("word")); Dataset intersection = ds1.join(ds2, ds1.col("word").equalTo(ds2.col("word"))).select(ds1.col("word").as("Common words")); intersection.show(); +-----------+-----------+ | document1| document2| +-----------+-----------+ |word1 word2|word2 word3| +-----------+-----------+ +------------+ |Common words| +------------+ | word2| +------------+