SHARE
TWEET

gtrie (quick predefined key approximated search)

keverman Nov 25th, 2018 (edited) 149 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. // Goof trie allows to quickly find a string with smallest distance
  2. // in a set of predefined (added) strings
  3.  
  4. // A structure that represents a node in a goof trie
  5. template<class T>
  6. struct gtrie_node
  7. {
  8.     ~gtrie_node()
  9.     {
  10.         for (auto &sub_node : sub_nodes)
  11.             delete sub_node.second;
  12.     }
  13.  
  14.     std::map<char, gtrie_node<T>*> sub_nodes;
  15.     std::optional<T> value;
  16.  
  17.     void assign(T &&new_value)
  18.     {
  19.         value = new_value;
  20.     }
  21. };
  22.  
  23. // Helper structure used to find an apporimxation of gtrie query
  24. template <class T>
  25. struct gtrie_snode
  26. {
  27.     gtrie_node<T> *node;
  28.     int depth, distance;
  29.  
  30.     gtrie_snode() {}
  31.     gtrie_snode(gtrie_node<T> *node, int depth, int distance)
  32.     {
  33.         this->node = node;
  34.         this->depth = depth;
  35.         this->distance = distance;
  36.     }
  37. };
  38.  
  39. // Compares two gtrie snodes by their distance and then depth
  40. template <class T>
  41. struct gtrie_comparator
  42. {
  43.     bool operator()(const gtrie_snode<T> &l, const gtrie_snode<T> &r)
  44.     {
  45.         if (l.distance == r.distance) return l.depth > r.depth;
  46.         return l.distance > r.distance;
  47.     }
  48. };
  49.  
  50.  
  51. template<class T>
  52. class gtrie
  53. {
  54.     private:
  55.  
  56.         gtrie_node<T> root;
  57.  
  58.     public:
  59.  
  60.         gtrie()
  61.         {
  62.             root = *(new gtrie_node<T>);
  63.         }
  64.  
  65.         // Load the tree from a file. Requires T to have a custom load constructor
  66.         // with ifstream
  67.         void load(std::ifstream &in, std::function<std::string(T)> get_phrase)
  68.         {
  69.             while (!in.eof())
  70.             {
  71.                 T tmp(in);
  72.                 add(get_phrase(tmp), std::move(tmp));
  73.             }
  74.         }
  75.  
  76.         // Builds a T value on the tree with phrase identifier
  77.         // Complexity O(n * log(A)), where n is phrase size and A is alphabet size
  78.         void add(std::string phrase, T &&assign_value)
  79.         {
  80.             gtrie_node<T> *cur = &root;
  81.  
  82.             for (char &c : phrase)
  83.             {
  84.                 auto search = cur->sub_nodes.find(c);
  85.  
  86.                 if (search == cur->sub_nodes.end())
  87.                 {
  88.                     cur->sub_nodes[c] = new gtrie_node<T>();
  89.                     cur = cur->sub_nodes[c];
  90.                 }
  91.  
  92.                 else
  93.                     cur = (*search).second;
  94.             }
  95.  
  96.             cur->assign(std::move(assign_value));
  97.         }
  98.        
  99.         // Returns the best match (with smallest distance) found in the tree
  100.         // Complexity: O(s), where s is the quantity of nodes on the tree, however
  101.         // for small max distances it's leaning towards linear towards phrase's length
  102.         std::optional<T> find(std::string phrase, int max_distance = 1)
  103.         {
  104.             std::vector<gtrie_snode<T>> res;    // Search results
  105.             std::priority_queue<gtrie_snode<T>,
  106.                 std::vector<gtrie_snode<T>>,
  107.                 gtrie_comparator<T>> PQ;        // Nodes priority queue
  108.  
  109.             // Initialize the search queue with the original element
  110.             PQ.push(gtrie_snode(&root, 0, 0));
  111.  
  112.             // Rotate through all eligible search nodes
  113.             while (!PQ.empty())
  114.             {
  115.                 gtrie_snode<T> snode = PQ.top();              // Current node
  116.                 PQ.pop();                                     // Pop the current node from PQ
  117.                 int leftDist = max_distance - snode.distance; // Current node's left distance
  118.  
  119.                 // If the node is in range of word distance and it is the last letter of gtrie's entry then it's a match
  120.                 if ((int)phrase.size() <= snode.depth + leftDist &&
  121.                     (int)phrase.size() >= max(0, snode.depth - leftDist) &&
  122.                     snode.node->value.has_value())
  123.                 {
  124.                     res.push_back(snode);
  125.  
  126.                     // If the strings are equal, break the search as a perfect match has been found
  127.                     if (snode.distance == 0 && snode.depth == phrase.size())
  128.                         break;
  129.                 }
  130.  
  131.                 // Queue eglible subnodes
  132.                 for (auto itn : snode.node->sub_nodes)
  133.                 {
  134.                     if (itn.first == phrase[snode.depth])   PQ.push(gtrie_snode<T>(itn.second, snode.depth + 1, snode.distance));
  135.                     else if (snode.distance < max_distance) PQ.push(gtrie_snode<T>(itn.second, snode.depth + 1, snode.distance + 1));
  136.                 }
  137.             }
  138.  
  139.             // If the search results aren't empty, assign the return value
  140.             // Return value should have the smallest distance possible
  141.             if (!res.empty())
  142.             {
  143.                 size_t i = 1, mn = 0;
  144.  
  145.                 for (; i < res.size(); i++)
  146.                 {
  147.                     if (res[i].distance < res[mn].distance)
  148.                         mn = i;
  149.                 }
  150.  
  151.                 return res[mn].node->value;
  152.             }
  153.  
  154.             return std::optional<T>();
  155.         }
  156. };
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top