View difference between Paste ID: R2ntEYYC and spKN8aNX
SHOW: | | - or go back to the newest paste.
1
// I think <algorithm> is unused.
2
#include <algorithm>
3
#include <iostream>
4
#include <map>
5
#include <string>
6
#include <vector>
7
8
using namespace std;
9
10
// Consider std::pair instead on std::vector<std::string>.
11
// Consider std::unordered_map instead of std::map if you have access to it.
12
// Somme people would rather see this whole thing wrapped in a class with a
13
// overload on operator().  The reasoning being that the constructor of the
14
// map runs before main() which hurts program start time.
15
map< vector< string >, int > lev_cache;
16
17
// There is a perf argument for passing in 4 string iterators.  But that will
18
// make the code somewhat less obvious.  Also it will make it so the cache is
19
// only useful for a single top level call to levenshtein_distance.
20
int levenshtein_distance(string l1, string l2) {
21
	// I'm not crazy about this early out.  I'm not sure if it's a win for performance.
22
	if (l1 == l2) return 0;
23
	if (l1 == "" && l2 != "") return l2.length();
24
	if (l1 != "" && l2 == "") return l1.length();
25
	
26
	// You're looking for the element twice in the case it's found, consider instead.
27
	// std::map<std::pair<std::string, std::string>, int>::iterator it = lev_cache.find(std::make_pair(l1, l2));
28
	// if (it != lev_cache.end())
29
	// 	return *it;
30
	vector< string > temp; temp.push_back(l1); temp.push_back(l2); //vector< string > temp = {l1, l2};
31
	if (lev_cache.count(temp) > 0) return lev_cache[temp];
32
	
33
	// Consider combining these and making them const where apropriate.
34
	// Consider making ll_1 and ll_2 chars.
35
	// const char l1_1 = l1[0];
36
	// const std::string l1_rest = l1.substr(1);
37
	// const char l2_1 = l2[0];
38
	// const std::string l2_rest = l2.substr(2);
39
	string l1_1, l1_rest, l2_1, l2_rest;
40
	l1_1 = l1.substr(0,1);
41
	l1_rest = l1.substr(1);
42
	l2_1 = l2.substr(0,1);
43
	l2_rest = l2.substr(1);
44
	if (l2_1 == l1_1) {
45
			int d = levenshtein_distance(l1_rest, l2_rest);
46-
}
46+
			// Looks like you're putting the wrong thing into the memorization table.  _rest, _rest should be in there from the recursive call.
47
			// lev_cache[std::make_pair(l1, l2)] = d;
48
			temp.clear(); temp.push_back(l1_rest); temp.push_back(l2_rest); //temp = {l1_rest, l2_rest};
49
			lev_cache[temp] = d;
50
			return d;
51
	}
52
	// Again it looks like you're putting the result of the recursive call into the table.  It's simpler to just put the result into the table.
53
	int m1 = levenshtein_distance(l1_rest, l2);
54
	temp.clear(); temp.push_back(l1_rest); temp.push_back(l2); //temp = {l1_rest, l2};
55
	lev_cache[temp] = m1;
56
	int m2 = levenshtein_distance(l1, l2_rest);
57
	temp.clear(); temp.push_back(l1); temp.push_back(l2_rest); //temp = {l1, l2_rest};
58
	lev_cache[temp] = m2;
59
	int m3 = levenshtein_distance(l1_rest, l2_rest);
60
	temp.clear(); temp.push_back(l1_rest); temp.push_back(l2_rest); //temp = {l1_rest, l2_rest};
61
	lev_cache[temp] = m3;
62
	return 1 + min(min(m1, m2), m3);
63
}
64
65
int main(int argc, char **argv)
66
{
67
	cout << "Distance: " << levenshtein_distance("the quick brown fox jumps over the lazy dog", "pack my box with five dozen liquor jugs") << "\n";
68
	//cout << "Distance: " << levenshtein_distance("the quick brown fox", "pack my box") << "\n";
69
70
	// return is technically not required for main (it's a special case), but I'd throw in return 0;
71
}
72
73
#if 0
74
75
#include <iostream>
76
#include <string>
77
#include <unordered_map>
78
#include <utility>
79
80
namespace std {
81
	template<>
82
	struct hash<std::pair<std::string, std::string> > {
83
		size_t operator()(const std::pair<std::string, std::string> &p) const {
84
			return std::hash<std::string>()(p.first) ^ std::hash<std::string>()(p.second);
85
		}
86
	};
87
}
88
89
std::unordered_map<std::pair<std::string, std::string>, int> lev_cache;
90
91
int levenshtein_distance(std::string l1, std::string l2) {
92
	if (l1.empty())
93
		return l2.length();
94
	if (l2.empty())
95
		return l1.length();
96
97
	std::unordered_map<std::pair<std::string, std::string>, int>::iterator it = lev_cache.find(std::make_pair(l1, l2));
98
	if (it != lev_cache.end())
99
		return it->second;
100
101
	const char l1_1 = l1[0];
102
	const std::string l1_rest = l1.substr(1);
103
	const char l2_1 = l2[0];
104
	const std::string l2_rest = l2.substr(1);
105
	if (l2_1 == l1_1) {
106
			const int d = levenshtein_distance(l1_rest, l2_rest);
107
			lev_cache[std::make_pair(l1, l2)] = d;
108
			return d;
109
	}
110
111
	const int d = 1 + std::min(std::min(levenshtein_distance(l1_rest, l2),
112
					    levenshtein_distance(l1, l2_rest)),
113
				   levenshtein_distance(l1_rest, l2_rest));
114
	lev_cache[std::make_pair(l1, l2)] = d;
115
	return d;
116
}
117
118
int main(int argc, char **argv)
119
{
120
	std::cout << "Distance: " << levenshtein_distance("the quick brown fox jumps over the lazy dog", "pack my box with five dozen liquor jugs") << "\n";
121
122
	return 0;
123
}
124
125
#endif