Advertisement
Guest User

Untitled

a guest
Oct 18th, 2019
152
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.60 KB | None | 0 0
  1. SparkSession spark = SparkSession.builder().master("local[6]").appName("simple").getOrCreate();
  2.  
  3. JavaSparkContext sc = new JavaSparkContext(spark.sparkContext());
  4.  
  5. // RDD<Tuple2<Text, Text>> f = sc.sequenceFile("hdfs://88.197.53.103:9000/part-m-00005", Text.class, Text.class);
  6.  
  7. // sc.textFile("hdfs://88.197.53.103:9000/copyleft").take(1);
  8.  
  9. JavaPairRDD<Text, Text> rdd = sc.sequenceFile("/Users/antleb/Downloads/part-m-00000", Text.class, Text.class);
  10.  
  11. Dataset<Row> df = spark.read().json(rdd.map(t -> {
  12. return t._2.toString();
  13. }));
  14.  
  15. df.createOrReplaceTempView("oaf");
  16.  
  17. spark.sql("select entity from oaf where kind='entity' and dataInfo.deletedbyinference=false").createOrReplaceTempView("entity");
  18. // spark.sql("select entity.type, count(*) from entity group by entity.type").show();
  19.  
  20. for (String table: new String[] {"datasource", "organization"})
  21. // for (String table: new String[] {"result", "datasource", "organization", "project"})
  22. spark.sql("select entity." + table + ", entity.id from entity").createOrReplaceTempView(table);
  23.  
  24. spark.sql("select * from entity").printSchema();
  25. spark.sql("select * from datasource").printSchema();
  26. spark.sql("select id, datasource.metadata.officialname.value, datasource.metadata.datasourcetype.classname as type, datasource.metadata.openairecompatibility.classname as compatibility, datasource.metadata.dateofvalidation.value as dateofvalidation, datasource.metadata.dateofvalidation.value as yearofvalidation from datasource").show();
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement