This week only. Pastebin PRO Accounts Christmas Special! Don't miss out!Want more features on Pastebin? Sign Up, it's FREE!
Guest

Untitled

By: a guest on Jul 6th, 2012  |  syntax: C++  |  size: 4.65 KB  |  views: 73  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
This paste has a previous version, view the difference. Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. // I think <algorithm> is unused.
  2. #include <algorithm>
  3. #include <iostream>
  4. #include <map>
  5. #include <string>
  6. #include <vector>
  7.  
  8. using namespace std;
  9.  
  10. // Consider std::pair instead on std::vector<std::string>.
  11. // Consider std::unordered_map instead of std::map if you have access to it.
  12. // Somme people would rather see this whole thing wrapped in a class with a
  13. // overload on operator().  The reasoning being that the constructor of the
  14. // map runs before main() which hurts program start time.
  15. map< vector< string >, int > lev_cache;
  16.  
  17. // There is a perf argument for passing in 4 string iterators.  But that will
  18. // make the code somewhat less obvious.  Also it will make it so the cache is
  19. // only useful for a single top level call to levenshtein_distance.
  20. int levenshtein_distance(string l1, string l2) {
  21.         // I'm not crazy about this early out.  I'm not sure if it's a win for performance.
  22.         if (l1 == l2) return 0;
  23.         if (l1 == "" && l2 != "") return l2.length();
  24.         if (l1 != "" && l2 == "") return l1.length();
  25.        
  26.         // You're looking for the element twice in the case it's found, consider instead.
  27.         // std::map<std::pair<std::string, std::string>, int>::iterator it = lev_cache.find(std::make_pair(l1, l2));
  28.         // if (it != lev_cache.end())
  29.         //      return *it;
  30.         vector< string > temp; temp.push_back(l1); temp.push_back(l2); //vector< string > temp = {l1, l2};
  31.         if (lev_cache.count(temp) > 0) return lev_cache[temp];
  32.        
  33.         // Consider combining these and making them const where apropriate.
  34.         // Consider making ll_1 and ll_2 chars.
  35.         // const char l1_1 = l1[0];
  36.         // const std::string l1_rest = l1.substr(1);
  37.         // const char l2_1 = l2[0];
  38.         // const std::string l2_rest = l2.substr(2);
  39.         string l1_1, l1_rest, l2_1, l2_rest;
  40.         l1_1 = l1.substr(0,1);
  41.         l1_rest = l1.substr(1);
  42.         l2_1 = l2.substr(0,1);
  43.         l2_rest = l2.substr(1);
  44.         if (l2_1 == l1_1) {
  45.                         int d = levenshtein_distance(l1_rest, l2_rest);
  46.                         // Looks like you're putting the wrong thing into the memorization table.  _rest, _rest should be in there from the recursive call.
  47.                         // lev_cache[std::make_pair(l1, l2)] = d;
  48.                         temp.clear(); temp.push_back(l1_rest); temp.push_back(l2_rest); //temp = {l1_rest, l2_rest};
  49.                         lev_cache[temp] = d;
  50.                         return d;
  51.         }
  52.         // Again it looks like you're putting the result of the recursive call into the table.  It's simpler to just put the result into the table.
  53.         int m1 = levenshtein_distance(l1_rest, l2);
  54.         temp.clear(); temp.push_back(l1_rest); temp.push_back(l2); //temp = {l1_rest, l2};
  55.         lev_cache[temp] = m1;
  56.         int m2 = levenshtein_distance(l1, l2_rest);
  57.         temp.clear(); temp.push_back(l1); temp.push_back(l2_rest); //temp = {l1, l2_rest};
  58.         lev_cache[temp] = m2;
  59.         int m3 = levenshtein_distance(l1_rest, l2_rest);
  60.         temp.clear(); temp.push_back(l1_rest); temp.push_back(l2_rest); //temp = {l1_rest, l2_rest};
  61.         lev_cache[temp] = m3;
  62.         return 1 + min(min(m1, m2), m3);
  63. }
  64.  
  65. int main(int argc, char **argv)
  66. {
  67.         cout << "Distance: " << levenshtein_distance("the quick brown fox jumps over the lazy dog", "pack my box with five dozen liquor jugs") << "\n";
  68.         //cout << "Distance: " << levenshtein_distance("the quick brown fox", "pack my box") << "\n";
  69.  
  70.         // return is technically not required for main (it's a special case), but I'd throw in return 0;
  71. }
  72.  
  73. #if 0
  74.  
  75. #include <iostream>
  76. #include <string>
  77. #include <unordered_map>
  78. #include <utility>
  79.  
  80. namespace std {
  81.         template<>
  82.         struct hash<std::pair<std::string, std::string> > {
  83.                 size_t operator()(const std::pair<std::string, std::string> &p) const {
  84.                         return std::hash<std::string>()(p.first) ^ std::hash<std::string>()(p.second);
  85.                 }
  86.         };
  87. }
  88.  
  89. std::unordered_map<std::pair<std::string, std::string>, int> lev_cache;
  90.  
  91. int levenshtein_distance(std::string l1, std::string l2) {
  92.         if (l1.empty())
  93.                 return l2.length();
  94.         if (l2.empty())
  95.                 return l1.length();
  96.  
  97.         std::unordered_map<std::pair<std::string, std::string>, int>::iterator it = lev_cache.find(std::make_pair(l1, l2));
  98.         if (it != lev_cache.end())
  99.                 return it->second;
  100.  
  101.         const char l1_1 = l1[0];
  102.         const std::string l1_rest = l1.substr(1);
  103.         const char l2_1 = l2[0];
  104.         const std::string l2_rest = l2.substr(1);
  105.         if (l2_1 == l1_1) {
  106.                         const int d = levenshtein_distance(l1_rest, l2_rest);
  107.                         lev_cache[std::make_pair(l1, l2)] = d;
  108.                         return d;
  109.         }
  110.  
  111.         const int d = 1 + std::min(std::min(levenshtein_distance(l1_rest, l2),
  112.                                             levenshtein_distance(l1, l2_rest)),
  113.                                    levenshtein_distance(l1_rest, l2_rest));
  114.         lev_cache[std::make_pair(l1, l2)] = d;
  115.         return d;
  116. }
  117.  
  118. int main(int argc, char **argv)
  119. {
  120.         std::cout << "Distance: " << levenshtein_distance("the quick brown fox jumps over the lazy dog", "pack my box with five dozen liquor jugs") << "\n";
  121.  
  122.         return 0;
  123. }
  124.  
  125. #endif
clone this paste RAW Paste Data