Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- SparkSession spark = SparkSession.builder().master("local[6]").appName("simple").getOrCreate();
- JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
- // RDD<Tuple2<Text, Text>> f = sc.sequenceFile("hdfs://88.197.53.103:9000/part-m-00005", Text.class, Text.class);
- // sc.textFile("hdfs://88.197.53.103:9000/copyleft").take(1);
- JavaPairRDD<Text, Text> rdd = sc.sequenceFile("/Users/antleb/Downloads/part-m-00000", Text.class, Text.class);
- Dataset<Row> df = spark.read().json(rdd.map(t -> {
- return t._2.toString();
- }));
- df.createOrReplaceTempView("oaf");
- spark.sql("select entity from oaf where kind='entity' and dataInfo.deletedbyinference=false").createOrReplaceTempView("entity");
- // spark.sql("select entity.type, count(*) from entity group by entity.type").show();
- for (String table: new String[] {"datasource", "organization"})
- // for (String table: new String[] {"result", "datasource", "organization", "project"})
- spark.sql("select entity." + table + ", entity.id from entity").createOrReplaceTempView(table);
- spark.sql("select * from entity").printSchema();
- spark.sql("select * from datasource").printSchema();
- spark.sql("select id, datasource.metadata.officialname.value, datasource.metadata.datasourcetype.classname as type, datasource.metadata.openairecompatibility.classname as compatibility, datasource.metadata.dateofvalidation.value as dateofvalidation, datasource.metadata.dateofvalidation.value as yearofvalidation from datasource").show();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement