Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- JavaSparkContext sc = new JavaSparkContext(new SparkConf().setAppName("SparkJdbcDs").setMaster("local[*]"));
- SQLContext sqlContext = new SQLContext(sc);
- SparkSession spark = SparkSession.builder().appName("JavaTokenizerExample").getOrCreate();
- Properties connectionProperties = new Properties();
- connectionProperties.put("user", "xxxx");
- connectionProperties.put("password","xxx");
- connectionProperties.put("partitionColumn", "HASH_CODE");
- connectionProperties.put("dbtable", "query");
- connectionProperties.put("columnName", "HASH_CODE");
- connectionProperties.put("lowerBound", "0");
- connectionProperties.put("upperBound", "8");
- connectionProperties.put("numPartitions", "8");
- String query="(SELECT mod(IM.ITEM_ID,8) as HASH_CODE,TO_CHAR(IM.ITEM_ID), IM.UPC, IM.PART_NUMBER,IM.MANUFACTURER_PART_NUMBER,M.MANUFACTURER_NAME,M.MANUFACTURER_ID,LIM.SHORT_DESC,T.TAXONOMY_ID FROM ITEM_MASTER IM,PRODUCTS P,MANUFACTURER M,LOC_ITEM_MASTER LIM,TAXONOMIES T,TAXONOMY_TREE TT,ITEM_CLASSIFICATION IC WHERE P.PRODUCT_ID(+)=IM.PRODUCT_ID AND M.MANUFACTURER_ID(+)=IM.MANUFACTURER_ID AND LIM.ITEM_ID(+)=IM.ITEM_ID AND TT.TAXONOMY_ID(+)=T.TAXONOMY_ID AND TT.TAXONOMY_TREE_ID=IC.TAXONOMY_TREE_ID AND IC.ITEM_ID(+)=IM.ITEM_ID AND T.TAXONOMY_ID='756' AND IM.Active != 'D')";
- Dataset<Row> targetData = spark.read().jdbc("jdbc:oracle:thin:@//xxx.xxx.xxx.xx/xxxx", query,connectionProperties);
- targetData.show();
- Dataset<Row> sourceData=sqlContext.read().format("com.crealytics.spark.excel").option("location", "D:\5Kto10K.xlsx").option("useHeader", "true")
- .option("inferSchema", "false").option("treatEmptyValuesAsNulls", "true").option("addColorColumns", "false").load();
- sourceData.show(500,false);
- Dataset<Row> joinedData =targetData.join(broadcast(sourceData),targetData.col("UPC").contains(sourceData.col("UpcSource")),"inner");
- joinedData.show(500,false);
Add Comment
Please, Sign In to add comment