Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package com.example;
- import org.apache.spark.SparkContext;
- import org.apache.spark.api.java.JavaRDD;
- import org.apache.spark.api.java.function.Function;
- import org.apache.spark.sql.Dataset;
- import org.apache.spark.sql.Row;
- import org.apache.spark.sql.RowFactory;
- import org.apache.spark.sql.SparkSession;
- import org.apache.spark.sql.types.*;
- public class Main {
- public static void main(String[] args) {
- SparkSession spark = SparkSession.builder().master("local[*]").appName("Word2Vec").getOrCreate();
- SparkContext sc = spark.sparkContext();
- sc.setLogLevel("WARN");
- JavaRDD<String> lines = sc.textFile("input.txt", 10).toJavaRDD();
- JavaRDD<Row> rows = lines.map(new Function<String, Row>(){
- public Row call(String line){
- return RowFactory.create(new String[][] {line.split(" ")});
- }
- });
- StructType schema = new StructType(new StructField[] {
- new StructField("text", new ArrayType(DataTypes.StringType, true), false, Metadata.empty())
- });
- Dataset<Row> input = spark.createDataFrame(rows, schema);
- input.show(3);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement