Advertisement
Guest User

Untitled

a guest
Jun 18th, 2019
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.30 KB | None | 0 0
  1. for(int i = 0 ; i <graph.getNodes().size(); i++) {
  2. for(int j = i +1; j < graph.getNodes().size();j++) {
  3. HashSet<String> spaceSet = new HashSet<String>();
  4. spaceSet.addAll(graph.getNodes().get(i).getTokenSet());
  5. spaceSet.addAll(graph.getNodes().get(j).getTokenSet());
  6. HashSet<String> closeSet = new HashSet<String>();
  7. HashMap<String,Double> closeMapMax = new HashMap<String,Double>();
  8. HashMap<String,Double> secondMap = new HashMap<String, Double>();
  9.  
  10. for(int k = 0; k < g.getNameAttributes().get(i).getValueToken().size(); k++) {
  11. for(int l = 0; l < g.getNameAttributes().get(j).getValueToken().size(); l++) {
  12. String a1 = g.getNameAttributes().get(i).getValueToken().get(k);
  13. String a2 = g.getNameAttributes().get(j).getValueToken().get(l);
  14. spaceSet.add(a1);
  15. spaceSet.add(a2);
  16. double jaro = TFIDF.applyJaroWinkler(a1, a2);
  17. if(jaro > 0.75) {
  18. closeSet.add(a1);
  19. }
  20. }
  21.  
  22. for( String close: closeSet) {
  23. double max;
  24. for(int l = 0; l < g.getNameAttributes().get(j).getValueToken().size(); l++) {
  25. double jaro = TFIDF.applyJaroWinkler(g.getNameAttributes().get(j).getValueToken().get(l), close);
  26. if(secondMap.containsKey(g.getNameAttributes().get(j).getValueToken().get(l))){
  27. if(secondMap.get(g.getNameAttributes().get(j).getValueToken().get(l)) < jaro){
  28. secondMap.put(g.getNameAttributes().get(j).getValueToken().get(l), jaro);
  29. }else continue;
  30. }else {
  31. secondMap.put(g.getNameAttributes().get(j).getValueToken().get(l), jaro);
  32. }
  33. }
  34. max = Collections.max(secondMap.values());
  35. closeMapMax.put(close, max);
  36. }
  37. }
  38.  
  39. double sumSoft = 0.0;
  40. for (String term : closeSet) {
  41. double tfidfI = g.getNodes().get(i).getTfIdfScore().get(term);
  42. HashMap.Entry<String, Double> maxEntry = null;
  43. for (HashMap.Entry<String, Double> entry : secondMap.entrySet()) {
  44. if (maxEntry == null || entry.getValue().compareTo(maxEntry.getValue()) > 0){
  45. maxEntry = entry;
  46. }
  47. }
  48. String termJ = maxEntry.getKey();
  49. double tfidfJ = g.getNodes().get(j).getTfIdfScore().get(termJ);
  50. double jMax = secondMap.get(termJ);
  51. double unitI = 0.0;
  52. double unitJ = 0.0;
  53. for(String s : spaceSet) {
  54. if(g.getNameAttributes().get(i).getValueToken().contains(s)) {
  55. unitI = unitI + g.getNodes().get(i).getTfIdfScore().get(s)*g.getNodes().get(i).getTfIdfScore().get(s);
  56. }
  57. if(g.getNameAttributes().get(j).getValueToken().contains(s)) {
  58. unitJ = unitJ + g.getNodes().get(j).getTfIdfScore().get(s)*g.getNodes().get(j).getTfIdfScore().get(s);
  59. }
  60. }
  61. sumSoft = sumSoft + (tfidfI / (java.lang.Math.sqrt(unitI))) * (tfidfJ / (java.lang.Math.sqrt(unitJ)))* jMax;
  62. }
  63.  
  64. Compare String "RianAir Corp" with "RiadAir Inc"
  65. Compare Token: RianAir RiadAir 0.9333333333333333
  66. Compare Token: RianAir Inc 0.4920634920634921
  67. Compare Token: Corp RiadAir 0.0
  68. Compare Token: Corp Inc 0.0
  69.  
  70. SpaceSet [RiadAir, RianAir, Corp, Inc]
  71. CLoseSet [RianAir]
  72. Most Similar Token in Second Node: RiadAir
  73. RianAir tf-idf-score: 1.0397207708399179
  74. RiadAir tf-idf-score: 0.4377343686769499
  75. Similarity between Token 0.9333333333333333
  76. Soft-TF-IDF Distance: 0.6388541633769426
  77. Normal Jaro-Winkler-Distance: 0.817929292929293
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement