SHOW:
|
|
- or go back to the newest paste.
1 | // I think <algorithm> is unused. | |
2 | #include <algorithm> | |
3 | #include <iostream> | |
4 | #include <map> | |
5 | #include <string> | |
6 | #include <vector> | |
7 | ||
8 | using namespace std; | |
9 | ||
10 | // Consider std::pair instead on std::vector<std::string>. | |
11 | // Consider std::unordered_map instead of std::map if you have access to it. | |
12 | // Somme people would rather see this whole thing wrapped in a class with a | |
13 | // overload on operator(). The reasoning being that the constructor of the | |
14 | // map runs before main() which hurts program start time. | |
15 | map< vector< string >, int > lev_cache; | |
16 | ||
17 | // There is a perf argument for passing in 4 string iterators. But that will | |
18 | // make the code somewhat less obvious. Also it will make it so the cache is | |
19 | // only useful for a single top level call to levenshtein_distance. | |
20 | int levenshtein_distance(string l1, string l2) { | |
21 | // I'm not crazy about this early out. I'm not sure if it's a win for performance. | |
22 | if (l1 == l2) return 0; | |
23 | if (l1 == "" && l2 != "") return l2.length(); | |
24 | if (l1 != "" && l2 == "") return l1.length(); | |
25 | ||
26 | // You're looking for the element twice in the case it's found, consider instead. | |
27 | // std::map<std::pair<std::string, std::string>, int>::iterator it = lev_cache.find(std::make_pair(l1, l2)); | |
28 | // if (it != lev_cache.end()) | |
29 | // return *it; | |
30 | vector< string > temp; temp.push_back(l1); temp.push_back(l2); //vector< string > temp = {l1, l2}; | |
31 | if (lev_cache.count(temp) > 0) return lev_cache[temp]; | |
32 | ||
33 | // Consider combining these and making them const where apropriate. | |
34 | // Consider making ll_1 and ll_2 chars. | |
35 | // const char l1_1 = l1[0]; | |
36 | // const std::string l1_rest = l1.substr(1); | |
37 | // const char l2_1 = l2[0]; | |
38 | // const std::string l2_rest = l2.substr(2); | |
39 | string l1_1, l1_rest, l2_1, l2_rest; | |
40 | l1_1 = l1.substr(0,1); | |
41 | l1_rest = l1.substr(1); | |
42 | l2_1 = l2.substr(0,1); | |
43 | l2_rest = l2.substr(1); | |
44 | if (l2_1 == l1_1) { | |
45 | int d = levenshtein_distance(l1_rest, l2_rest); | |
46 | - | } |
46 | + | // Looks like you're putting the wrong thing into the memorization table. _rest, _rest should be in there from the recursive call. |
47 | // lev_cache[std::make_pair(l1, l2)] = d; | |
48 | temp.clear(); temp.push_back(l1_rest); temp.push_back(l2_rest); //temp = {l1_rest, l2_rest}; | |
49 | lev_cache[temp] = d; | |
50 | return d; | |
51 | } | |
52 | // Again it looks like you're putting the result of the recursive call into the table. It's simpler to just put the result into the table. | |
53 | int m1 = levenshtein_distance(l1_rest, l2); | |
54 | temp.clear(); temp.push_back(l1_rest); temp.push_back(l2); //temp = {l1_rest, l2}; | |
55 | lev_cache[temp] = m1; | |
56 | int m2 = levenshtein_distance(l1, l2_rest); | |
57 | temp.clear(); temp.push_back(l1); temp.push_back(l2_rest); //temp = {l1, l2_rest}; | |
58 | lev_cache[temp] = m2; | |
59 | int m3 = levenshtein_distance(l1_rest, l2_rest); | |
60 | temp.clear(); temp.push_back(l1_rest); temp.push_back(l2_rest); //temp = {l1_rest, l2_rest}; | |
61 | lev_cache[temp] = m3; | |
62 | return 1 + min(min(m1, m2), m3); | |
63 | } | |
64 | ||
65 | int main(int argc, char **argv) | |
66 | { | |
67 | cout << "Distance: " << levenshtein_distance("the quick brown fox jumps over the lazy dog", "pack my box with five dozen liquor jugs") << "\n"; | |
68 | //cout << "Distance: " << levenshtein_distance("the quick brown fox", "pack my box") << "\n"; | |
69 | ||
70 | // return is technically not required for main (it's a special case), but I'd throw in return 0; | |
71 | } | |
72 | ||
73 | #if 0 | |
74 | ||
75 | #include <iostream> | |
76 | #include <string> | |
77 | #include <unordered_map> | |
78 | #include <utility> | |
79 | ||
80 | namespace std { | |
81 | template<> | |
82 | struct hash<std::pair<std::string, std::string> > { | |
83 | size_t operator()(const std::pair<std::string, std::string> &p) const { | |
84 | return std::hash<std::string>()(p.first) ^ std::hash<std::string>()(p.second); | |
85 | } | |
86 | }; | |
87 | } | |
88 | ||
89 | std::unordered_map<std::pair<std::string, std::string>, int> lev_cache; | |
90 | ||
91 | int levenshtein_distance(std::string l1, std::string l2) { | |
92 | if (l1.empty()) | |
93 | return l2.length(); | |
94 | if (l2.empty()) | |
95 | return l1.length(); | |
96 | ||
97 | std::unordered_map<std::pair<std::string, std::string>, int>::iterator it = lev_cache.find(std::make_pair(l1, l2)); | |
98 | if (it != lev_cache.end()) | |
99 | return it->second; | |
100 | ||
101 | const char l1_1 = l1[0]; | |
102 | const std::string l1_rest = l1.substr(1); | |
103 | const char l2_1 = l2[0]; | |
104 | const std::string l2_rest = l2.substr(1); | |
105 | if (l2_1 == l1_1) { | |
106 | const int d = levenshtein_distance(l1_rest, l2_rest); | |
107 | lev_cache[std::make_pair(l1, l2)] = d; | |
108 | return d; | |
109 | } | |
110 | ||
111 | const int d = 1 + std::min(std::min(levenshtein_distance(l1_rest, l2), | |
112 | levenshtein_distance(l1, l2_rest)), | |
113 | levenshtein_distance(l1_rest, l2_rest)); | |
114 | lev_cache[std::make_pair(l1, l2)] = d; | |
115 | return d; | |
116 | } | |
117 | ||
118 | int main(int argc, char **argv) | |
119 | { | |
120 | std::cout << "Distance: " << levenshtein_distance("the quick brown fox jumps over the lazy dog", "pack my box with five dozen liquor jugs") << "\n"; | |
121 | ||
122 | return 0; | |
123 | } | |
124 | ||
125 | #endif |