Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /*
- http://leetcode.com/onlinejudge#question_127
- Word Ladder
- Given two words (start and end), and a dictionary, find the length of shortest transformation sequence from start to end, such that:
- Only one letter can be changed at a time
- Each intermediate word must exist in the dictionary
- For example,
- Given:
- start = "hit"
- end = "cog"
- dict = ["hot","dot","dog","lot","log"]
- As one shortest transformation is "hit" -> "hot" -> "dot" -> "dog" -> "cog",
- return its length 5.
- Note:
- Return 0 if there is no such transformation sequence
- */
- /*
- Given N words in dictionary and M word length and alphabet size A,
- This solution has the complexity of O(N*M*M*A).
- Take for example, N=3000, M = 4, A = 25
- N * M * M * A = 1,200,000
- For every word W in dictionary D
- we compute M partial hashes PH,
- where i-th partial hash is a hash computed from word W with i character skipped.
- To compute M partial hashes we must iterate M times over word W or length M,
- hence the complexity of partial hashes computation is O(M*M)
- Then we link word W into bags of words with same partial hashes.
- Therefore in each bag B there are words that might differ by only one character
- which was skipped during partial hash computation.
- For each bag of words B we that connect all words in this bag into graph G
- Hence possible quadratic complexity O(M*A),
- because each bag might contain upto A words
- and there are possible M positions of skipped characters
- After all possible words connected, we use BFS to find min path from start to end
- The solution can be improved if compute partial hashes with linear complexity O(M) instead of O(M*M)
- */
- #include "stdafx.h"
- #include <iostream>
- #include <utility>
- #include <unordered_set>
- #include <unordered_map>
- #include <string>
- #include <vector>
- #include <deque>
- #include <functional>
- #include <stdint.h>
- using namespace std;
- typedef unordered_set<string> dict_t;
- typedef size_t hash_t;
- typedef pair<hash_t, int> partial_hash_t; // hash computed with ith character skipped
- typedef unordered_set<int> word_bucket_t; // a set of word indices with the same partial hash
- template<typename T>
- inline void hash_combine(hash_t & H, const T v)
- {
- std::hash<T> hash_fn; // standard hash function
- H ^= hash_fn(v) + 0x9e3779b9 + (H<<6) + (H>>2);
- }
- // hash function for partial hash
- class phash_fn {
- public:
- size_t operator ()(const partial_hash_t &v) const
- {
- hash_t H = 0;
- hash_combine(H, v.first);
- hash_combine(H, v.second);
- return H;
- }
- };
- class phash_eq{
- public:
- bool operator ()(const partial_hash_t &a, const partial_hash_t &b) const
- {
- return a.first == b.first && a.second == b.second;
- }
- };
- // lets put words with same partial hashes into same word bucket
- typedef unordered_map<partial_hash_t, word_bucket_t, phash_fn, phash_eq> bucket_table_t;
- typedef uint16_t vertex_t;
- typedef unordered_map<int, word_bucket_t> graph_t;
- inline void connect(graph_t & G, vertex_t i, vertex_t j)
- {
- G[i].insert(j);
- G[j].insert(i);
- }
- hash_t str_hash(const string & s, const size_t len)
- {
- hash_t H = 0;
- for(size_t i = 0; i < len; ++i)
- {
- hash_combine(H, s[i]);
- }
- return H;
- }
- hash_t str_hash(const string & s, size_t skip_pos, const size_t len)
- {
- hash_t H = 0;
- for(size_t i =0; i < len; ++i)
- {
- if(i != skip_pos)
- {
- hash_combine(H, s[i]);
- }
- }
- return H;
- }
- class Solution {
- public:
- vector<string> word_vec;
- void compute_hashes(vector<hash_t>& hashes,
- const string & word, const size_t word_len)
- {
- // reset hashes for the current word
- hashes.resize(word_len, 0);
- for(size_t hash_no = 0; hash_no < word_len; ++hash_no)
- {
- // skip character #hash_no when updating hash[hash_no]
- hashes[hash_no] = str_hash(word, hash_no, word_len);
- }
- }
- void connect_graph(graph_t & G, bucket_table_t & wordbuckets,
- const vector<hash_t> & full_hashes)
- {
- // check all the bucket within the same bucket
- for(bucket_table_t::iterator it = wordbuckets.begin(); it != wordbuckets.end(); ++it)
- {
- word_bucket_t & bucket = it->second;
- word_bucket_t::iterator w1, w2, e = bucket.end();
- for(w1 = bucket.begin(); w1 != e; ++w1)
- {
- for(w2 = w1; w2 != e; ++w2)
- {
- if(w1 == w2) { continue; }
- const size_t word1_pos = *w1, word2_pos = *w2;
- // don't connect word with itself (when 'start' matches
- if(word1_pos == word2_pos || full_hashes[word1_pos] == full_hashes[word2_pos] )
- {
- continue;
- }
- connect(G, word1_pos, word2_pos);
- }
- }
- }
- }
- int ladderLength(string start, string end, dict_t &dict)
- {
- const size_t WORD_LEN = start.size();
- bool start_in_dict = (dict.find(start) != dict.end());
- //bool end_in_dict = (dict.find(end) != dict.end());
- // modify start and end so that
- // their full hashes are different from words in dict
- start += '^';
- end += '$';
- dict.insert(start);
- dict.insert(end);
- const size_t NUM_WORDS = dict.size();
- word_vec.resize(NUM_WORDS);
- size_t start_pos = INT_MAX, end_pos = INT_MAX;
- vector<hash_t> full_hashes(NUM_WORDS);
- bucket_table_t wordbuckets;
- vector<hash_t> partial_hashes;
- // for all the words in the dictionary
- dict_t::iterator w = dict.begin();
- for(size_t word_no = 0; w != dict.end(); ++w, ++word_no)
- {
- const string & word = *w;
- hash_t full_hash = str_hash(word, WORD_LEN);
- // compute partial hashes for all partial strings
- // ith partial string is the original string with ith character skipped
- compute_hashes(partial_hashes, word, WORD_LEN);
- // put all words that are different by only one character
- // into wordbuckets of words
- for(size_t hash_no = 0; hash_no < WORD_LEN; ++hash_no)
- {
- const hash_t & H = partial_hashes[hash_no];
- wordbuckets[make_pair(H, hash_no)].insert(word_no);
- }
- if(start_pos == INT_MAX && word == start) { start_pos = word_no; }
- if(end_pos == INT_MAX && word == end) { end_pos = word_no; }
- word_vec[word_no] = word;
- full_hashes[word_no] = full_hash;
- }
- graph_t G;
- connect_graph(G, wordbuckets, full_hashes);
- graph_t::iterator g;
- if(!start_in_dict)
- {
- g = G.find(start_pos);
- if(g != G.end())
- {
- word_bucket_t & siblings = g->second;
- // remove connection from start_pos to end_pos if they are connected
- // by problem definition path must go throught the graph
- word_bucket_t::iterator s = siblings.find(end_pos);
- if(s != siblings.end()) { siblings.erase(s); }
- if(siblings.empty()) { return 0; }
- }
- else { return 0; } // no connections from 'start' to any node in the graph? exit
- }
- // no connections from 'end' to any node in the graph? exit
- g = G.find(end_pos);
- if( g == G.end() || g->second.empty()) { return 0; }
- vector<string> path;
- return bfs(G, start_pos, end_pos, path, WORD_LEN);
- }
- struct node
- {
- int vertex;
- int depth;
- node(int v, int d = 1)
- : vertex(v), depth(d)
- {}
- };
- int bfs(graph_t & G, int vertex_start, int vertex_end, vector<string> & path, int word_len)
- {
- std::deque<node> Q;
- std::unordered_set<vertex_t> visited;
- Q.push_back(node(vertex_start));
- while(!Q.empty())
- {
- node current = Q.front();
- Q.pop_front();
- if(current.vertex == vertex_end) {return current.depth;}
- if(visited.find(current.vertex) != visited.end()) { continue; }
- visited.insert(current.vertex);
- graph_t::iterator connected = G.find(current.vertex);
- if(connected == G.end()) { continue; } // no connected words found, skip
- const string & S = word_vec[current.vertex];
- word_bucket_t & siblings = connected->second;
- word_bucket_t::iterator sibling = siblings.begin(), end = siblings.end();
- for(; sibling != end; ++sibling)
- {
- Q.push_back(node(*sibling, current.depth + 1));
- }
- }
- return 0;
- }
- };
- #define TEST 5
- int _tmain(int argc, _TCHAR* argv[])
- {
- dict_t words;
- int ret;
- #if TEST==1
- words.insert("hot");
- words.insert("dot");
- words.insert("dog");
- words.insert("lot");
- words.insert("log");
- ret = Solution().ladderLength("hit", "cog", words);
- #elif TEST==2
- words.insert("hot");
- ret = Solution().ladderLength("hot", "hot", words);
- #elif TEST==3
- words.insert("dot");
- words.insert("hot");
- ret = Solution().ladderLength("hot", "hot", words);
- #elif TEST==4
- // ["talk","tons","fall","tail","gale","hall","negs"]
- words.insert("talk");
- words.insert("tons");
- words.insert("fall");
- words.insert("tail");
- words.insert("gale");
- words.insert("hall");
- words.insert("negs");
- ret = Solution().ladderLength("talk", "tail", words);
- #elif TEST==5
- words.insert("a");
- words.insert("b");
- words.insert("c");
- ret = Solution().ladderLength("a", "c", words);
- #endif
- std::cout << ret;
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment