Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- +-----------------+---------------------+
- | document1 | document2 |
- +-----------------+---------------------+
- | word1 word2 | word2 word3 |
- +-----------------+---------------------+
- sqlContext.udf().register("intersection", new UDF2<String, String, Double>() {
- @Override
- public Double call(String arg, String arg2) throws Exception {
- double key = inter(arg, arg2);
- return key;
- }
- }, DataTypes.DoubleType);
- v.registerTempTable("v_table");
- Dataset<Row> df = sqlContext.sql("select document, document1, "
- + "intersection(document, document1) as RowKey1,"
- + " from v_table");
- df.show();
- INFO SparkSqlParser: Parsing command: select document, document1, intersection(v.col(document), v.col(document1)) as RowKey1, from v_table
- Exception in thread "main" org.apache.spark.sql.AnalysisException: cannot resolve '`document`' given input columns: []; line 1 pos 7
- root
- |-- document: string (nullable = true)
- |-- document1: string (nullable = true)
- Dataset<Row> ds = spark.sql("select 'word1 word2' as document1, 'word2 word3' as document2");
- ds.show();
- Dataset<Row> ds1 = ds.select(functions.explode(functions.split(ds.col("document1"), " ")).as("word"));
- Dataset<Row> ds2 = ds.select(functions.explode(functions.split(ds.col("document2"), " ")).as("word"));
- Dataset<Row> intersection = ds1.join(ds2, ds1.col("word").equalTo(ds2.col("word"))).select(ds1.col("word").as("Common words"));
- intersection.show();
- +-----------+-----------+
- | document1| document2|
- +-----------+-----------+
- |word1 word2|word2 word3|
- +-----------+-----------+
- +------------+
- |Common words|
- +------------+
- | word2|
- +------------+
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement