Advertisement
Guest User

Untitled

a guest
Jul 6th, 2012
147
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. // I think <algorithm> is unused.
  2. #include <algorithm>
  3. #include <iostream>
  4. #include <map>
  5. #include <string>
  6. #include <vector>
  7.  
  8. using namespace std;
  9.  
  10. // Consider std::pair instead on std::vector<std::string>.
  11. // Consider std::unordered_map instead of std::map if you have access to it.
  12. // Somme people would rather see this whole thing wrapped in a class with a
  13. // overload on operator().  The reasoning being that the constructor of the
  14. // map runs before main() which hurts program start time.
  15. map< vector< string >, int > lev_cache;
  16.  
  17. // There is a perf argument for passing in 4 string iterators.  But that will
  18. // make the code somewhat less obvious.  Also it will make it so the cache is
  19. // only useful for a single top level call to levenshtein_distance.
  20. int levenshtein_distance(string l1, string l2) {
  21.     // I'm not crazy about this early out.  I'm not sure if it's a win for performance.
  22.     if (l1 == l2) return 0;
  23.     if (l1 == "" && l2 != "") return l2.length();
  24.     if (l1 != "" && l2 == "") return l1.length();
  25.    
  26.     // You're looking for the element twice in the case it's found, consider instead.
  27.     // std::map<std::pair<std::string, std::string>, int>::iterator it = lev_cache.find(std::make_pair(l1, l2));
  28.     // if (it != lev_cache.end())
  29.     //  return *it;
  30.     vector< string > temp; temp.push_back(l1); temp.push_back(l2); //vector< string > temp = {l1, l2};
  31.     if (lev_cache.count(temp) > 0) return lev_cache[temp];
  32.    
  33.     // Consider combining these and making them const where apropriate.
  34.     // Consider making ll_1 and ll_2 chars.
  35.     // const char l1_1 = l1[0];
  36.     // const std::string l1_rest = l1.substr(1);
  37.     // const char l2_1 = l2[0];
  38.     // const std::string l2_rest = l2.substr(2);
  39.     string l1_1, l1_rest, l2_1, l2_rest;
  40.     l1_1 = l1.substr(0,1);
  41.     l1_rest = l1.substr(1);
  42.     l2_1 = l2.substr(0,1);
  43.     l2_rest = l2.substr(1);
  44.     if (l2_1 == l1_1) {
  45.             int d = levenshtein_distance(l1_rest, l2_rest);
  46.             // Looks like you're putting the wrong thing into the memorization table.  _rest, _rest should be in there from the recursive call.
  47.             // lev_cache[std::make_pair(l1, l2)] = d;
  48.             temp.clear(); temp.push_back(l1_rest); temp.push_back(l2_rest); //temp = {l1_rest, l2_rest};
  49.             lev_cache[temp] = d;
  50.             return d;
  51.     }
  52.     // Again it looks like you're putting the result of the recursive call into the table.  It's simpler to just put the result into the table.
  53.     int m1 = levenshtein_distance(l1_rest, l2);
  54.     temp.clear(); temp.push_back(l1_rest); temp.push_back(l2); //temp = {l1_rest, l2};
  55.     lev_cache[temp] = m1;
  56.     int m2 = levenshtein_distance(l1, l2_rest);
  57.     temp.clear(); temp.push_back(l1); temp.push_back(l2_rest); //temp = {l1, l2_rest};
  58.     lev_cache[temp] = m2;
  59.     int m3 = levenshtein_distance(l1_rest, l2_rest);
  60.     temp.clear(); temp.push_back(l1_rest); temp.push_back(l2_rest); //temp = {l1_rest, l2_rest};
  61.     lev_cache[temp] = m3;
  62.     return 1 + min(min(m1, m2), m3);
  63. }
  64.  
  65. int main(int argc, char **argv)
  66. {
  67.     cout << "Distance: " << levenshtein_distance("the quick brown fox jumps over the lazy dog", "pack my box with five dozen liquor jugs") << "\n";
  68.     //cout << "Distance: " << levenshtein_distance("the quick brown fox", "pack my box") << "\n";
  69.  
  70.     // return is technically not required for main (it's a special case), but I'd throw in return 0;
  71. }
  72.  
  73. #if 0
  74.  
  75. #include <iostream>
  76. #include <string>
  77. #include <unordered_map>
  78. #include <utility>
  79.  
  80. namespace std {
  81.     template<>
  82.     struct hash<std::pair<std::string, std::string> > {
  83.         size_t operator()(const std::pair<std::string, std::string> &p) const {
  84.             return std::hash<std::string>()(p.first) ^ std::hash<std::string>()(p.second);
  85.         }
  86.     };
  87. }
  88.  
  89. std::unordered_map<std::pair<std::string, std::string>, int> lev_cache;
  90.  
  91. int levenshtein_distance(std::string l1, std::string l2) {
  92.     if (l1.empty())
  93.         return l2.length();
  94.     if (l2.empty())
  95.         return l1.length();
  96.  
  97.     std::unordered_map<std::pair<std::string, std::string>, int>::iterator it = lev_cache.find(std::make_pair(l1, l2));
  98.     if (it != lev_cache.end())
  99.         return it->second;
  100.  
  101.     const char l1_1 = l1[0];
  102.     const std::string l1_rest = l1.substr(1);
  103.     const char l2_1 = l2[0];
  104.     const std::string l2_rest = l2.substr(1);
  105.     if (l2_1 == l1_1) {
  106.             const int d = levenshtein_distance(l1_rest, l2_rest);
  107.             lev_cache[std::make_pair(l1, l2)] = d;
  108.             return d;
  109.     }
  110.  
  111.     const int d = 1 + std::min(std::min(levenshtein_distance(l1_rest, l2),
  112.                         levenshtein_distance(l1, l2_rest)),
  113.                    levenshtein_distance(l1_rest, l2_rest));
  114.     lev_cache[std::make_pair(l1, l2)] = d;
  115.     return d;
  116. }
  117.  
  118. int main(int argc, char **argv)
  119. {
  120.     std::cout << "Distance: " << levenshtein_distance("the quick brown fox jumps over the lazy dog", "pack my box with five dozen liquor jugs") << "\n";
  121.  
  122.     return 0;
  123. }
  124.  
  125. #endif
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement