runnig

Word Ladder, O(N*M*M*A Complexity)

Feb 16th, 2013
177
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 8.77 KB | None | 0 0
  1. /*
  2.  
  3. http://leetcode.com/onlinejudge#question_127
  4.  
  5. Word Ladder
  6. Given two words (start and end), and a dictionary, find the length of shortest transformation sequence from start to end, such that:
  7.  
  8. Only one letter can be changed at a time
  9. Each intermediate word must exist in the dictionary
  10. For example,
  11.  
  12. Given:
  13. start = "hit"
  14. end = "cog"
  15. dict = ["hot","dot","dog","lot","log"]
  16. As one shortest transformation is "hit" -> "hot" -> "dot" -> "dog" -> "cog",
  17. return its length 5.
  18.  
  19. Note:
  20. Return 0 if there is no such transformation sequence
  21. */
  22.  
  23. /*
  24. Given N words in dictionary and M word length and alphabet size A,
  25. This solution has the complexity of O(N*M*M*A).
  26.  
  27. Take for example, N=3000, M = 4, A = 25
  28. N * M * M * A = 1,200,000
  29.  
  30. For every word W in dictionary D
  31. we compute M partial hashes PH,
  32. where i-th partial hash is a hash computed from word W with i character skipped.
  33. To compute M partial hashes we must iterate M times over word W or length M,
  34. hence the complexity of partial hashes computation is O(M*M)
  35.  
  36. Then we link word W into bags of words with same partial hashes.
  37. Therefore in each bag B there are words that might differ by only one character
  38. which was skipped during partial hash computation.
  39.  
  40. For each bag of words B we that connect all words in this bag into graph G
  41. Hence possible quadratic complexity O(M*A),
  42. because each bag might contain upto A words
  43. and there are possible M positions of skipped characters
  44.  
  45. After all possible words connected, we use BFS to find min path from start to end
  46.  
  47. The solution can be improved if compute partial hashes with linear complexity O(M) instead of O(M*M)
  48. */
  49. #include "stdafx.h"
  50. #include <iostream>
  51. #include <utility>
  52. #include <unordered_set>
  53. #include <unordered_map>
  54. #include <string>
  55. #include <vector>
  56. #include <deque>
  57. #include <functional>
  58. #include <stdint.h>
  59. using namespace std;
  60.  
  61. typedef unordered_set<string> dict_t;
  62.  
  63. typedef size_t hash_t;
  64. typedef pair<hash_t, int> partial_hash_t; // hash computed with ith character skipped
  65. typedef unordered_set<int> word_bucket_t; // a set of word indices with the same partial hash
  66.  
  67. template<typename T>
  68. inline void hash_combine(hash_t & H, const T v)
  69. {
  70.     std::hash<T> hash_fn; // standard hash function
  71.     H ^= hash_fn(v) + 0x9e3779b9 + (H<<6) + (H>>2);
  72. }
  73.  
  74. // hash function for partial hash
  75. class phash_fn {
  76. public:
  77.     size_t operator ()(const partial_hash_t &v) const
  78.     {
  79.         hash_t H = 0;
  80.         hash_combine(H, v.first);
  81.         hash_combine(H, v.second);
  82.         return H;
  83.     }
  84. };
  85.  
  86. class phash_eq{
  87. public:
  88.     bool operator ()(const partial_hash_t &a, const partial_hash_t &b) const
  89.     {
  90.         return a.first == b.first && a.second == b.second;
  91.     }
  92. };
  93.  
  94. // lets put words with same partial hashes into same word bucket
  95. typedef unordered_map<partial_hash_t, word_bucket_t, phash_fn, phash_eq> bucket_table_t;
  96.  
  97. typedef uint16_t vertex_t;
  98.  
  99. typedef unordered_map<int, word_bucket_t> graph_t;
  100.  
  101. inline void connect(graph_t & G, vertex_t i, vertex_t j)
  102. {
  103.     G[i].insert(j);
  104.     G[j].insert(i);
  105. }
  106.  
  107. hash_t str_hash(const string & s, const size_t len)
  108. {
  109.     hash_t H = 0;
  110.     for(size_t i = 0; i < len; ++i)
  111.     {
  112.         hash_combine(H, s[i]);
  113.     }
  114.     return H;
  115. }
  116.  
  117.  
  118. hash_t str_hash(const string & s, size_t skip_pos, const size_t len)
  119. {
  120.     hash_t H = 0;
  121.     for(size_t i =0; i < len; ++i)
  122.     {
  123.         if(i != skip_pos)
  124.         {
  125.             hash_combine(H, s[i]);
  126.         }
  127.     }
  128.     return H;
  129. }
  130.  
  131. class Solution {
  132. public:
  133.     vector<string> word_vec;
  134.  
  135.  
  136.     void compute_hashes(vector<hash_t>& hashes,
  137.         const string & word, const size_t word_len)
  138.     {
  139.         // reset hashes for the current word
  140.         hashes.resize(word_len, 0);
  141.  
  142.         for(size_t hash_no = 0; hash_no < word_len; ++hash_no)
  143.         {
  144.             // skip character #hash_no when updating hash[hash_no]
  145.             hashes[hash_no] = str_hash(word, hash_no, word_len);
  146.         }
  147.     }
  148.  
  149.     void connect_graph(graph_t & G, bucket_table_t & wordbuckets,
  150.         const vector<hash_t> & full_hashes)
  151.     {
  152.         // check all the bucket within the same bucket
  153.         for(bucket_table_t::iterator it = wordbuckets.begin(); it != wordbuckets.end(); ++it)
  154.         {
  155.             word_bucket_t & bucket = it->second;
  156.             word_bucket_t::iterator w1, w2, e = bucket.end();
  157.  
  158.             for(w1 = bucket.begin(); w1 != e; ++w1)
  159.             {
  160.                 for(w2 = w1; w2 != e; ++w2)
  161.                 {
  162.                     if(w1 == w2) { continue; }
  163.  
  164.                     const size_t word1_pos = *w1, word2_pos = *w2;
  165.  
  166.                     // don't connect word with itself (when 'start' matches
  167.                     if(word1_pos == word2_pos || full_hashes[word1_pos] == full_hashes[word2_pos] )
  168.                     {
  169.                         continue;
  170.                     }
  171.                     connect(G, word1_pos, word2_pos);
  172.                 }
  173.             }
  174.         }
  175.     }
  176.  
  177.     int ladderLength(string start, string end, dict_t &dict)
  178.     {  
  179.         const size_t WORD_LEN = start.size();
  180.  
  181.         bool start_in_dict = (dict.find(start) != dict.end());
  182.         //bool end_in_dict = (dict.find(end) != dict.end());
  183.  
  184.         // modify start and end so that
  185.         // their full hashes are different from words in dict
  186.         start += '^';
  187.         end += '$';
  188.  
  189.         dict.insert(start);
  190.         dict.insert(end);
  191.  
  192.         const size_t NUM_WORDS = dict.size();
  193.  
  194.         word_vec.resize(NUM_WORDS);
  195.  
  196.         size_t start_pos = INT_MAX, end_pos = INT_MAX;
  197.  
  198.         vector<hash_t> full_hashes(NUM_WORDS);
  199.  
  200.         bucket_table_t wordbuckets;
  201.         vector<hash_t> partial_hashes;
  202.  
  203.         // for all the words in the dictionary
  204.         dict_t::iterator w = dict.begin();
  205.         for(size_t word_no = 0; w != dict.end(); ++w, ++word_no)
  206.         {
  207.             const string & word = *w;
  208.  
  209.             hash_t full_hash = str_hash(word, WORD_LEN);
  210.  
  211.             // compute partial hashes for all partial strings
  212.             // ith partial string is the original string with ith character skipped
  213.             compute_hashes(partial_hashes, word, WORD_LEN);
  214.  
  215.             // put all words that are different by only one character
  216.             // into wordbuckets of words
  217.             for(size_t hash_no = 0; hash_no < WORD_LEN; ++hash_no)
  218.             {
  219.                 const hash_t & H = partial_hashes[hash_no];
  220.                 wordbuckets[make_pair(H, hash_no)].insert(word_no);
  221.             }
  222.  
  223.  
  224.             if(start_pos == INT_MAX && word == start) { start_pos = word_no; }
  225.             if(end_pos == INT_MAX && word == end) { end_pos = word_no; }
  226.  
  227.             word_vec[word_no] = word;
  228.             full_hashes[word_no] = full_hash;
  229.         }
  230.  
  231.         graph_t G;
  232.  
  233.         connect_graph(G, wordbuckets, full_hashes);
  234.  
  235.         graph_t::iterator g;
  236.  
  237.         if(!start_in_dict)
  238.         {
  239.             g = G.find(start_pos);
  240.             if(g != G.end())
  241.             {  
  242.                 word_bucket_t & siblings = g->second;
  243.  
  244.                 // remove connection from start_pos to end_pos if they are connected
  245.                 // by problem definition path must go throught the graph
  246.                 word_bucket_t::iterator s = siblings.find(end_pos);
  247.                 if(s != siblings.end()) { siblings.erase(s); }
  248.  
  249.                 if(siblings.empty()) { return 0; }
  250.             }
  251.             else { return 0; } // no connections from 'start' to any node in the graph? exit
  252.         }
  253.  
  254.         // no connections from 'end' to any node in the graph? exit
  255.         g = G.find(end_pos);
  256.         if( g == G.end() || g->second.empty()) { return 0; }
  257.  
  258.  
  259.         vector<string> path;
  260.         return bfs(G, start_pos, end_pos, path, WORD_LEN);
  261.     }
  262.  
  263.     struct node
  264.     {
  265.         int vertex;
  266.         int depth;
  267.         node(int v, int d = 1)
  268.             : vertex(v), depth(d)
  269.         {}
  270.     };
  271.  
  272.     int bfs(graph_t & G, int vertex_start, int vertex_end, vector<string> & path, int word_len)
  273.     {
  274.         std::deque<node> Q;
  275.         std::unordered_set<vertex_t> visited;
  276.         Q.push_back(node(vertex_start));
  277.  
  278.         while(!Q.empty())
  279.         {
  280.             node current = Q.front();                    
  281.             Q.pop_front();
  282.  
  283.             if(current.vertex == vertex_end) {return current.depth;}
  284.  
  285.             if(visited.find(current.vertex) != visited.end()) { continue; }
  286.  
  287.             visited.insert(current.vertex);
  288.  
  289.             graph_t::iterator connected = G.find(current.vertex);
  290.             if(connected == G.end()) { continue; } // no connected words found, skip
  291.  
  292.             const string & S = word_vec[current.vertex];
  293.  
  294.             word_bucket_t & siblings = connected->second;
  295.             word_bucket_t::iterator sibling = siblings.begin(), end = siblings.end();
  296.  
  297.             for(; sibling != end; ++sibling)
  298.             {
  299.                 Q.push_back(node(*sibling, current.depth + 1));
  300.             }
  301.         }
  302.         return 0;
  303.     }
  304. };
  305.  
  306.  
  307. #define TEST 5
  308.  
  309. int _tmain(int argc, _TCHAR* argv[])
  310. {
  311.     dict_t words;
  312.     int ret;
  313.  
  314. #if TEST==1
  315.     words.insert("hot");
  316.     words.insert("dot");
  317.     words.insert("dog");
  318.     words.insert("lot");
  319.     words.insert("log");
  320.     ret = Solution().ladderLength("hit", "cog", words);
  321. #elif TEST==2
  322.     words.insert("hot");
  323.     ret = Solution().ladderLength("hot", "hot", words);
  324. #elif TEST==3
  325.     words.insert("dot");
  326.     words.insert("hot");
  327.     ret = Solution().ladderLength("hot", "hot", words);
  328.  
  329. #elif TEST==4
  330.     // ["talk","tons","fall","tail","gale","hall","negs"]
  331.     words.insert("talk");
  332.     words.insert("tons");
  333.     words.insert("fall");
  334.     words.insert("tail");
  335.     words.insert("gale");
  336.     words.insert("hall");
  337.     words.insert("negs");
  338.     ret = Solution().ladderLength("talk", "tail", words);
  339. #elif TEST==5
  340.     words.insert("a");
  341.     words.insert("b");
  342.     words.insert("c");
  343.     ret = Solution().ladderLength("a", "c", words);
  344. #endif
  345.  
  346.     std::cout << ret;
  347.  
  348.  
  349.     return 0;
  350. }
Advertisement
Add Comment
Please, Sign In to add comment