Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // I think <algorithm> is unused.
- #include <algorithm>
- #include <iostream>
- #include <map>
- #include <string>
- #include <vector>
- using namespace std;
- // Consider std::pair instead on std::vector<std::string>.
- // Consider std::unordered_map instead of std::map if you have access to it.
- // Somme people would rather see this whole thing wrapped in a class with a
- // overload on operator(). The reasoning being that the constructor of the
- // map runs before main() which hurts program start time.
- map< vector< string >, int > lev_cache;
- // There is a perf argument for passing in 4 string iterators. But that will
- // make the code somewhat less obvious. Also it will make it so the cache is
- // only useful for a single top level call to levenshtein_distance.
- int levenshtein_distance(string l1, string l2) {
- // I'm not crazy about this early out. I'm not sure if it's a win for performance.
- if (l1 == l2) return 0;
- if (l1 == "" && l2 != "") return l2.length();
- if (l1 != "" && l2 == "") return l1.length();
- // You're looking for the element twice in the case it's found, consider instead.
- // std::map<std::pair<std::string, std::string>, int>::iterator it = lev_cache.find(std::make_pair(l1, l2));
- // if (it != lev_cache.end())
- // return *it;
- vector< string > temp; temp.push_back(l1); temp.push_back(l2); //vector< string > temp = {l1, l2};
- if (lev_cache.count(temp) > 0) return lev_cache[temp];
- // Consider combining these and making them const where apropriate.
- // Consider making ll_1 and ll_2 chars.
- // const char l1_1 = l1[0];
- // const std::string l1_rest = l1.substr(1);
- // const char l2_1 = l2[0];
- // const std::string l2_rest = l2.substr(2);
- string l1_1, l1_rest, l2_1, l2_rest;
- l1_1 = l1.substr(0,1);
- l1_rest = l1.substr(1);
- l2_1 = l2.substr(0,1);
- l2_rest = l2.substr(1);
- if (l2_1 == l1_1) {
- int d = levenshtein_distance(l1_rest, l2_rest);
- // Looks like you're putting the wrong thing into the memorization table. _rest, _rest should be in there from the recursive call.
- // lev_cache[std::make_pair(l1, l2)] = d;
- temp.clear(); temp.push_back(l1_rest); temp.push_back(l2_rest); //temp = {l1_rest, l2_rest};
- lev_cache[temp] = d;
- return d;
- }
- // Again it looks like you're putting the result of the recursive call into the table. It's simpler to just put the result into the table.
- int m1 = levenshtein_distance(l1_rest, l2);
- temp.clear(); temp.push_back(l1_rest); temp.push_back(l2); //temp = {l1_rest, l2};
- lev_cache[temp] = m1;
- int m2 = levenshtein_distance(l1, l2_rest);
- temp.clear(); temp.push_back(l1); temp.push_back(l2_rest); //temp = {l1, l2_rest};
- lev_cache[temp] = m2;
- int m3 = levenshtein_distance(l1_rest, l2_rest);
- temp.clear(); temp.push_back(l1_rest); temp.push_back(l2_rest); //temp = {l1_rest, l2_rest};
- lev_cache[temp] = m3;
- return 1 + min(min(m1, m2), m3);
- }
- int main(int argc, char **argv)
- {
- cout << "Distance: " << levenshtein_distance("the quick brown fox jumps over the lazy dog", "pack my box with five dozen liquor jugs") << "\n";
- //cout << "Distance: " << levenshtein_distance("the quick brown fox", "pack my box") << "\n";
- // return is technically not required for main (it's a special case), but I'd throw in return 0;
- }
- #if 0
- #include <iostream>
- #include <string>
- #include <unordered_map>
- #include <utility>
- namespace std {
- template<>
- struct hash<std::pair<std::string, std::string> > {
- size_t operator()(const std::pair<std::string, std::string> &p) const {
- return std::hash<std::string>()(p.first) ^ std::hash<std::string>()(p.second);
- }
- };
- }
- std::unordered_map<std::pair<std::string, std::string>, int> lev_cache;
- int levenshtein_distance(std::string l1, std::string l2) {
- if (l1.empty())
- return l2.length();
- if (l2.empty())
- return l1.length();
- std::unordered_map<std::pair<std::string, std::string>, int>::iterator it = lev_cache.find(std::make_pair(l1, l2));
- if (it != lev_cache.end())
- return it->second;
- const char l1_1 = l1[0];
- const std::string l1_rest = l1.substr(1);
- const char l2_1 = l2[0];
- const std::string l2_rest = l2.substr(1);
- if (l2_1 == l1_1) {
- const int d = levenshtein_distance(l1_rest, l2_rest);
- lev_cache[std::make_pair(l1, l2)] = d;
- return d;
- }
- const int d = 1 + std::min(std::min(levenshtein_distance(l1_rest, l2),
- levenshtein_distance(l1, l2_rest)),
- levenshtein_distance(l1_rest, l2_rest));
- lev_cache[std::make_pair(l1, l2)] = d;
- return d;
- }
- int main(int argc, char **argv)
- {
- std::cout << "Distance: " << levenshtein_distance("the quick brown fox jumps over the lazy dog", "pack my box with five dozen liquor jugs") << "\n";
- return 0;
- }
- #endif
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement