Word Ladder, O(NMM*A Complexity)

/*

http://leetcode.com/onlinejudge#question_127

Word Ladder
Given two words (start and end), and a dictionary, find the length of shortest transformation sequence from start to end, such that:

Only one letter can be changed at a time
Each intermediate word must exist in the dictionary
For example,

Given:
start = "hit"
end = "cog"
dict = ["hot","dot","dog","lot","log"]
As one shortest transformation is "hit" -> "hot" -> "dot" -> "dog" -> "cog",
return its length 5.

Note:
Return 0 if there is no such transformation sequence
*/

/*
Given N words in dictionary and M word length and alphabet size A,
This solution has the complexity of O(N*M*M*A).

Take for example, N=3000, M = 4, A = 25
N * M * M * A = 1,200,000

For every word W in dictionary D
we compute M partial hashes PH,
where i-th partial hash is a hash computed from word W with i character skipped.
To compute M partial hashes we must iterate M times over word W or length M,
hence the complexity of partial hashes computation is O(M*M)

Then we link word W into bags of words with same partial hashes.
Therefore in each bag B there are words that might differ by only one character
which was skipped during partial hash computation.

For each bag of words B we that connect all words in this bag into graph G
Hence possible quadratic complexity O(M*A),
because each bag might contain upto A words
and there are possible M positions of skipped characters

After all possible words connected, we use BFS to find min path from start to end

The solution can be improved if compute partial hashes with linear complexity O(M) instead of O(M*M)
*/
#include "stdafx.h"
#include <iostream>
#include <utility>
#include <unordered_set>
#include <unordered_map>
#include <string>
#include <vector>
#include <deque>
#include <functional>
#include <stdint.h>
using namespace std;

typedef unordered_set<string> dict_t;

typedef size_t hash_t;
typedef pair<hash_t, int> partial_hash_t; // hash computed with ith character skipped
typedef unordered_set<int> word_bucket_t; // a set of word indices with the same partial hash

template<typename T>
inline void hash_combine(hash_t & H, const T v)
{
    std::hash<T> hash_fn; // standard hash function
    H ^= hash_fn(v) + 0x9e3779b9 + (H<<6) + (H>>2);
}

// hash function for partial hash
class phash_fn {
public:
    size_t operator ()(const partial_hash_t &v) const
    {
        hash_t H = 0;
        hash_combine(H, v.first);
        hash_combine(H, v.second);
        return H;
    }
};

class phash_eq{
public:
    bool operator ()(const partial_hash_t &a, const partial_hash_t &b) const
    {
        return a.first == b.first && a.second == b.second;
    }
};

// lets put words with same partial hashes into same word bucket
typedef unordered_map<partial_hash_t, word_bucket_t, phash_fn, phash_eq> bucket_table_t;

typedef uint16_t vertex_t;

typedef unordered_map<int, word_bucket_t> graph_t;

inline void connect(graph_t & G, vertex_t i, vertex_t j)
{
    G[i].insert(j);
    G[j].insert(i);
}

hash_t str_hash(const string & s, const size_t len)
{
    hash_t H = 0;
    for(size_t i = 0; i < len; ++i)
    {
        hash_combine(H, s[i]);
    }
    return H;
}


hash_t str_hash(const string & s, size_t skip_pos, const size_t len)
{
    hash_t H = 0;
    for(size_t i =0; i < len; ++i)
    {
        if(i != skip_pos)
        {
            hash_combine(H, s[i]);
        }
    }
    return H;
}

class Solution {
public:
    vector<string> word_vec;


    void compute_hashes(vector<hash_t>& hashes,
        const string & word, const size_t word_len)
    {
        // reset hashes for the current word
        hashes.resize(word_len, 0);

        for(size_t hash_no = 0; hash_no < word_len; ++hash_no)
        {
            // skip character #hash_no when updating hash[hash_no]
            hashes[hash_no] = str_hash(word, hash_no, word_len);
        }
    }

    void connect_graph(graph_t & G, bucket_table_t & wordbuckets,
        const vector<hash_t> & full_hashes)
    {
        // check all the bucket within the same bucket
        for(bucket_table_t::iterator it = wordbuckets.begin(); it != wordbuckets.end(); ++it)
        {
            word_bucket_t & bucket = it->second;
            word_bucket_t::iterator w1, w2, e = bucket.end();

            for(w1 = bucket.begin(); w1 != e; ++w1)
            {
                for(w2 = w1; w2 != e; ++w2)
                {
                    if(w1 == w2) { continue; }

                    const size_t word1_pos = *w1, word2_pos = *w2;

                    // don't connect word with itself (when 'start' matches
                    if(word1_pos == word2_pos || full_hashes[word1_pos] == full_hashes[word2_pos] )
                    {
                        continue;
                    }
                    connect(G, word1_pos, word2_pos);
                }
            }
        }
    }

    int ladderLength(string start, string end, dict_t &dict)
    {
        const size_t WORD_LEN = start.size();

        bool start_in_dict = (dict.find(start) != dict.end());
        //bool end_in_dict = (dict.find(end) != dict.end());

        // modify start and end so that
        // their full hashes are different from words in dict
        start += '^';
        end += '$';

        dict.insert(start);
        dict.insert(end);

        const size_t NUM_WORDS = dict.size();

        word_vec.resize(NUM_WORDS);

        size_t start_pos = INT_MAX, end_pos = INT_MAX;

        vector<hash_t> full_hashes(NUM_WORDS);

        bucket_table_t wordbuckets;
        vector<hash_t> partial_hashes;

        // for all the words in the dictionary
        dict_t::iterator w = dict.begin();
        for(size_t word_no = 0; w != dict.end(); ++w, ++word_no)
        {
            const string & word = *w;

            hash_t full_hash = str_hash(word, WORD_LEN);

            // compute partial hashes for all partial strings
            // ith partial string is the original string with ith character skipped
            compute_hashes(partial_hashes, word, WORD_LEN);

            // put all words that are different by only one character
            // into wordbuckets of words
            for(size_t hash_no = 0; hash_no < WORD_LEN; ++hash_no)
            {
                const hash_t & H = partial_hashes[hash_no];
                wordbuckets[make_pair(H, hash_no)].insert(word_no);
            }


            if(start_pos == INT_MAX && word == start) { start_pos = word_no; }
            if(end_pos == INT_MAX && word == end) { end_pos = word_no; }

            word_vec[word_no] = word;
            full_hashes[word_no] = full_hash;
        }

        graph_t G;

        connect_graph(G, wordbuckets, full_hashes);

        graph_t::iterator g;

        if(!start_in_dict)
        {
            g = G.find(start_pos);
            if(g != G.end())
            {
                word_bucket_t & siblings = g->second;

                // remove connection from start_pos to end_pos if they are connected
                // by problem definition path must go throught the graph
                word_bucket_t::iterator s = siblings.find(end_pos);
                if(s != siblings.end()) { siblings.erase(s); }

                if(siblings.empty()) { return 0; }
            }
            else { return 0; } // no connections from 'start' to any node in the graph? exit
        }

        // no connections from 'end' to any node in the graph? exit
        g = G.find(end_pos);
        if( g == G.end() || g->second.empty()) { return 0; }


        vector<string> path;
        return bfs(G, start_pos, end_pos, path, WORD_LEN);
    }

    struct node
    {
        int vertex;
        int depth;
        node(int v, int d = 1)
            : vertex(v), depth(d)
        {}
    };

    int bfs(graph_t & G, int vertex_start, int vertex_end, vector<string> & path, int word_len)
    {
        std::deque<node> Q;
        std::unordered_set<vertex_t> visited;
        Q.push_back(node(vertex_start));

        while(!Q.empty())
        {
            node current = Q.front();
            Q.pop_front();

            if(current.vertex == vertex_end) {return current.depth;}

            if(visited.find(current.vertex) != visited.end()) { continue; }

            visited.insert(current.vertex);

            graph_t::iterator connected = G.find(current.vertex);
            if(connected == G.end()) { continue; } // no connected words found, skip

            const string & S = word_vec[current.vertex];

            word_bucket_t & siblings = connected->second;
            word_bucket_t::iterator sibling = siblings.begin(), end = siblings.end();

            for(; sibling != end; ++sibling)
            {
                Q.push_back(node(*sibling, current.depth + 1));
            }
        }
        return 0;
    }
};


#define TEST 5

int _tmain(int argc, _TCHAR* argv[])
{
    dict_t words;
    int ret;

#if TEST==1
    words.insert("hot");
    words.insert("dot");
    words.insert("dog");
    words.insert("lot");
    words.insert("log");
    ret = Solution().ladderLength("hit", "cog", words);
#elif TEST==2
    words.insert("hot");
    ret = Solution().ladderLength("hot", "hot", words);
#elif TEST==3
    words.insert("dot");
    words.insert("hot");
    ret = Solution().ladderLength("hot", "hot", words);

#elif TEST==4
    // ["talk","tons","fall","tail","gale","hall","negs"]
    words.insert("talk");
    words.insert("tons");
    words.insert("fall");
    words.insert("tail");
    words.insert("gale");
    words.insert("hall");
    words.insert("negs");
    ret = Solution().ladderLength("talk", "tail", words);
#elif TEST==5
    words.insert("a");
    words.insert("b");
    words.insert("c");
    ret = Solution().ladderLength("a", "c", words);
#endif

    std::cout << ret;


    return 0;
}