Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- private void calculateMIScores(){
- DataSourceFrequency dsf1 = new MemoryBackedDataSourceFrequency();
- ArrayList<String> columnsToEvaluate = new ArrayList<String>();
- Iterator<MatchingConfig> it = rm_conf.getMatchingConfigs().iterator();
- while(it.hasNext()){
- MatchingConfig mc = it.next();
- for(String s: mc.getBlockingColumns()){
- columnsToEvaluate.add(s);
- }
- for(String s: mc.getIncludedColumnsNames()){
- columnsToEvaluate.add(s);
- }
- }
- Object[] obj = dsf1.getFields().toArray();
- ArrayList<ColumnPair> colum_pair_list = new ArrayList<ColumnPair>();
- final ArrayList<String> collected = new ArrayList<String>();
- for(int x = 0; x < columnsToEvaluate.size(); x++) {
- for(int y = 0; y < columnsToEvaluate.size(); y++) {
- ColumnPair column_pair = new ColumnPair();
- if(y == x) continue;
- final String p1 =columnsToEvaluate.get(x) +" "+ columnsToEvaluate.get(y);
- final String p2 = columnsToEvaluate.get(y) +" "+ columnsToEvaluate.get(x);
- if(!collected.contains(p1) && !collected.contains(p2)) {
- collected.add(p1);
- column_pair.setColumnA(new Column(columnsToEvaluate.get(x),dsf1));
- column_pair.setColumnB(new Column(columnsToEvaluate.get(y),dsf1));
- colum_pair_list.add(column_pair);
- }
- }
- }
- MICalculator mi_calculator = new MICalculator();
- mi_calculator.setColumn_pair_list(colum_pair_list);
- rm_conf.setDataSourceFrequency1(dsf1);
- FrequencyAnalyzer fa1 = new FrequencyAnalyzer(rm_conf.getLinkDataSource1(), null, dsf1);
- ReaderProvider rp = ReaderProvider.getInstance();
- System.out.println("Calculating pairwise combinations for included columns in Dataset A...");
- DataSourceAnalysis dsa = new DataSourceAnalysis(rp.getReader(rm_conf.getLinkDataSource1()));
- dsa.addAnalyzer(fa1);
- dsa.analyzeData();
- for(ColumnPair cp : colum_pair_list){
- List<Column> columns = new ArrayList();
- columns.add(cp.getColumnA());
- columns.add(cp.getColumnB());
- for(Column c : columns){
- Map<String, Integer> frequency = new HashMap();
- Set<String> tokens = dsf1.getTokens(c.getName());
- for(String token: tokens){
- frequency.put(token, dsf1.getFrequency(c.getName(), token));
- }
- c.setFrequency(frequency);
- }
- }
- ReaderProvider rp1 = ReaderProvider.getInstance();
- DataSourceAnalysis dsa2 = new DataSourceAnalysis(rp1.getReader(rm_conf.getLinkDataSource1()));
- MutualInformationAnalyzer fa2 = new MutualInformationAnalyzer(rm_conf, mi_calculator);
- dsa2.addAnalyzer(fa2);
- dsa2.analyzeData();
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement