Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import java.io.*;
- import org.apache.spark.SparkConf;
- import org.apache.spark.SparkContext;
- import org.apache.spark.api.java.JavaSparkContext;
- import org.apache.spark.api.java.*;
- import org.apache.spark.api.java.function.*;
- import org.apache.spark.sql.hive.HiveContext;
- import org.apache.spark.sql.SchemaRDD;
- import org.apache.spark.sql.api.java.Row;
- import java.util.Arrays;
- import scala.Tuple2;
- public class SQL {
- public static void main(String[] args) {
- String inputFile = "/home/lorenzo/Desktop/testtweets.json";
- //String outputFile = "/words.txt";
- SparkConf conf = new SparkConf().setAppName("Simple Application");
- SparkContext sc = new SparkContext(conf);
- HiveContext hiveCtx = new HiveContext(sc);
- /*SchemaRDD input = hiveCtx.jsonFile(inputFile);
- input.printSchema();
- input.registerTempTable("tweets");
- SchemaRDD topTweets = hiveCtx.sql("EXPLAIN SELECT text, retweetCount FROM tweets ORDER BY retweetCount LIMIT 10");
- JavaRDD<String> topTweetText = topTweets.toJavaRDD().map((row) -> {
- return row.getString(0);});
- System.out.println("ciao" + topTweetText.collect());*/
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement