Advertisement
Guest User

Untitled

a guest
Oct 19th, 2011
26
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.68 KB | None | 0 0
  1. private void calculateMIScores(){
  2. DataSourceFrequency dsf1 = new MemoryBackedDataSourceFrequency();
  3.  
  4. ArrayList<String> columnsToEvaluate = new ArrayList<String>();
  5. Iterator<MatchingConfig> it = rm_conf.getMatchingConfigs().iterator();
  6.  
  7. while(it.hasNext()){
  8. MatchingConfig mc = it.next();
  9. for(String s: mc.getBlockingColumns()){
  10. columnsToEvaluate.add(s);
  11. }
  12. for(String s: mc.getIncludedColumnsNames()){
  13. columnsToEvaluate.add(s);
  14. }
  15. }
  16.  
  17. Object[] obj = dsf1.getFields().toArray();
  18. ArrayList<ColumnPair> colum_pair_list = new ArrayList<ColumnPair>();
  19.  
  20. final ArrayList<String> collected = new ArrayList<String>();
  21. for(int x = 0; x < columnsToEvaluate.size(); x++) {
  22. for(int y = 0; y < columnsToEvaluate.size(); y++) {
  23. ColumnPair column_pair = new ColumnPair();
  24.  
  25. if(y == x) continue;
  26.  
  27. final String p1 =columnsToEvaluate.get(x) +" "+ columnsToEvaluate.get(y);
  28. final String p2 = columnsToEvaluate.get(y) +" "+ columnsToEvaluate.get(x);
  29. if(!collected.contains(p1) && !collected.contains(p2)) {
  30. collected.add(p1);
  31. column_pair.setColumnA(new Column(columnsToEvaluate.get(x),dsf1));
  32. column_pair.setColumnB(new Column(columnsToEvaluate.get(y),dsf1));
  33. colum_pair_list.add(column_pair);
  34. }
  35. }
  36. }
  37.  
  38. MICalculator mi_calculator = new MICalculator();
  39. mi_calculator.setColumn_pair_list(colum_pair_list);
  40.  
  41. rm_conf.setDataSourceFrequency1(dsf1);
  42.  
  43. FrequencyAnalyzer fa1 = new FrequencyAnalyzer(rm_conf.getLinkDataSource1(), null, dsf1);
  44.  
  45. ReaderProvider rp = ReaderProvider.getInstance();
  46.  
  47. System.out.println("Calculating pairwise combinations for included columns in Dataset A...");
  48. DataSourceAnalysis dsa = new DataSourceAnalysis(rp.getReader(rm_conf.getLinkDataSource1()));
  49.  
  50. dsa.addAnalyzer(fa1);
  51. dsa.analyzeData();
  52.  
  53. for(ColumnPair cp : colum_pair_list){
  54. List<Column> columns = new ArrayList();
  55. columns.add(cp.getColumnA());
  56. columns.add(cp.getColumnB());
  57.  
  58. for(Column c : columns){
  59. Map<String, Integer> frequency = new HashMap();
  60. Set<String> tokens = dsf1.getTokens(c.getName());
  61.  
  62. for(String token: tokens){
  63. frequency.put(token, dsf1.getFrequency(c.getName(), token));
  64. }
  65. c.setFrequency(frequency);
  66. }
  67. }
  68.  
  69. ReaderProvider rp1 = ReaderProvider.getInstance();
  70.  
  71. DataSourceAnalysis dsa2 = new DataSourceAnalysis(rp1.getReader(rm_conf.getLinkDataSource1()));
  72. MutualInformationAnalyzer fa2 = new MutualInformationAnalyzer(rm_conf, mi_calculator);
  73. dsa2.addAnalyzer(fa2);
  74. dsa2.analyzeData();
  75. }
  76.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement