Guest User

Untitled

a guest
Dec 14th, 2017
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.87 KB | None | 0 0
  1. JavaSparkContext sc = new JavaSparkContext(new SparkConf().setAppName("SparkJdbcDs").setMaster("local[*]"));
  2. SQLContext sqlContext = new SQLContext(sc);
  3. SparkSession spark = SparkSession.builder().appName("JavaTokenizerExample").getOrCreate();
  4. Properties connectionProperties = new Properties();
  5. connectionProperties.put("user", "xxxx");
  6. connectionProperties.put("password","xxx");
  7. connectionProperties.put("partitionColumn", "HASH_CODE");
  8. connectionProperties.put("dbtable", "query");
  9. connectionProperties.put("columnName", "HASH_CODE");
  10. connectionProperties.put("lowerBound", "0");
  11. connectionProperties.put("upperBound", "8");
  12. connectionProperties.put("numPartitions", "8");
  13.  
  14.  
  15. String query="(SELECT mod(IM.ITEM_ID,8) as HASH_CODE,TO_CHAR(IM.ITEM_ID), IM.UPC, IM.PART_NUMBER,IM.MANUFACTURER_PART_NUMBER,M.MANUFACTURER_NAME,M.MANUFACTURER_ID,LIM.SHORT_DESC,T.TAXONOMY_ID FROM ITEM_MASTER IM,PRODUCTS P,MANUFACTURER M,LOC_ITEM_MASTER LIM,TAXONOMIES T,TAXONOMY_TREE TT,ITEM_CLASSIFICATION IC WHERE P.PRODUCT_ID(+)=IM.PRODUCT_ID AND M.MANUFACTURER_ID(+)=IM.MANUFACTURER_ID AND LIM.ITEM_ID(+)=IM.ITEM_ID AND TT.TAXONOMY_ID(+)=T.TAXONOMY_ID AND TT.TAXONOMY_TREE_ID=IC.TAXONOMY_TREE_ID AND IC.ITEM_ID(+)=IM.ITEM_ID AND T.TAXONOMY_ID='756' AND IM.Active != 'D')";
  16.  
  17.  
  18. Dataset<Row> targetData = spark.read().jdbc("jdbc:oracle:thin:@//xxx.xxx.xxx.xx/xxxx", query,connectionProperties);
  19. targetData.show();
  20.  
  21. Dataset<Row> sourceData=sqlContext.read().format("com.crealytics.spark.excel").option("location", "D:\5Kto10K.xlsx").option("useHeader", "true")
  22. .option("inferSchema", "false").option("treatEmptyValuesAsNulls", "true").option("addColorColumns", "false").load();
  23. sourceData.show(500,false);
  24.  
  25. Dataset<Row> joinedData =targetData.join(broadcast(sourceData),targetData.col("UPC").contains(sourceData.col("UpcSource")),"inner");
  26. joinedData.show(500,false);
Add Comment
Please, Sign In to add comment