Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- // Goof trie complexities:
- // n - key length, A - alphabet size
- // md - maximal allowed Levenshtein distance
- // add, erase - O(n * log(A))
- // find - O(n * A ^ md) [closer to O(A ^ md) for real-world data]
- // A structure that represents a node in a goof trie
- template<class T>
- struct gtrie_node
- {
- std::map<char, gtrie_node<T>*> sub_nodes;
- std::optional<T> value;
- ~gtrie_node()
- {
- for (auto& sub_node : sub_nodes)
- delete sub_node.second;
- }
- };
- // Helper structure used to find an apporimxation of gtrie query
- template <class T>
- struct gtrie_snode
- {
- gtrie_node<T>* node;
- int depth, distance;
- gtrie_snode() {}
- gtrie_snode(gtrie_node<T>* node, int depth, int distance)
- {
- this->node = node;
- this->depth = depth;
- this->distance = distance;
- }
- };
- // Compares two gtrie snodes by their distance and then depth
- template <class T>
- struct gtrie_comparator
- {
- bool operator()(const gtrie_snode<T>& l, const gtrie_snode<T>& r)
- {
- if (l.distance == r.distance) return l.depth > r.depth;
- return l.distance > r.distance;
- }
- };
- // Goof trie is a trie-like structure that allows for inaccuracies when searching for a key
- // Using the find function, user can search through the dictionary for a phrase with
- // the smallest Levenshtein distance (in reference to the key string)
- template<class T>
- class gtrie
- {
- private:
- gtrie_node<T> root;
- T default_result;
- public:
- gtrie(T default_result = T())
- {
- this->default_result = default_result;
- }
- // Uses key to build T on the trie
- void add(std::string key, T value)
- {
- gtrie_node<T>* cur = &root;
- for (char c : key)
- {
- if (cur->sub_nodes.find(c) == cur->sub_nodes.end())
- cur->sub_nodes[c] = new gtrie_node<T>();
- cur = cur->sub_nodes[c];
- }
- cur->value = value;
- }
- // Erases the key from the tree, cleaning up empty nodes
- void erase(std::string key)
- {
- erase(key, 0, &root);
- }
- // Recursive erease function
- bool erase(std::string& key, int pos, gtrie_node<T>* node)
- {
- // Hit the end of the tree
- if (pos == key.size())
- {
- node->value = default_result;
- return true;
- }
- // Continue descent if a the key exists on the tree
- if (node->sub_nodes.find(key[pos]) != node->sub_nodes.end())
- {
- // Call subroutine, and consider erasing subnodes if it returned true
- gtrie_node<T>* next = node->sub_nodes[key[pos]];
- if (!erase(key, pos + 1, next))
- return false;
- // There are no other keys extending from the current branch, so delete the node
- if (next->sub_nodes.empty())
- {
- node->sub_nodes.erase(key[pos]);
- delete next;
- return true;
- }
- }
- return false;
- }
- // Returns the best match (with smallest distance) found in the tree
- T find(std::string phrase, int max_distance = 1)
- {
- std::vector<gtrie_snode<T>> res; // Search results
- std::priority_queue<gtrie_snode<T>,
- std::vector<gtrie_snode<T>>,
- gtrie_comparator<T>> PQ; // Nodes priority queue
- // Rotate through all eligible search nodes starting from the root
- PQ.push(gtrie_snode(&root, 0, 0));
- while (!PQ.empty())
- {
- gtrie_snode<T> snode = PQ.top(); PQ.pop(); // Current node
- int distance_left = max_distance - snode.distance; // Current node's left distance
- // If the node is in range of word distance and it is the last letter of gtrie's entry then it's a match
- if ((int)phrase.size() <= snode.depth + distance_left &&
- (int)phrase.size() >= std::max(0, snode.depth - distance_left) &&
- snode.node->value.has_value())
- {
- res.push_back(snode);
- // Break if the match is perfect
- if (snode.distance == 0 && snode.depth == phrase.size())
- break;
- }
- // Continue descent
- else if (snode.depth < phrase.size())
- {
- // Queue eglible subnodes
- for (auto itn : snode.node->sub_nodes)
- {
- if (itn.first == phrase[snode.depth]) PQ.push({ itn.second, snode.depth + 1, snode.distance });
- else if (snode.distance < max_distance) PQ.push({ itn.second, snode.depth + 1, snode.distance + 1 });
- }
- // Skip the current character
- if (snode.distance < max_distance)
- PQ.push({ snode.node, snode.depth + 1, snode.distance + 1 });
- }
- }
- // If the search results aren't empty, assign the return value
- // Return value should have the smallest distance possible
- if (res.empty())
- return default_result;
- int mn = 0;
- for (int i = 1; i < res.size(); i++)
- if (res[i].distance < res[mn].distance)
- mn = i;
- return res[mn].node->value.value();
- }
- };
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement