Word Ladder 2 (all paths)

#include "stdafx.h"
#include <iostream>
#include <utility>
#include <unordered_set>
#include <unordered_map>
#include <string>
#include <vector>
#include <deque>
#include <functional>
#include <stdint.h>

using namespace std;

typedef unordered_set<string> dict_t;

typedef size_t hash_t;
typedef pair<hash_t, int> partial_hash_t; // hash computed with ith character skipped
typedef unordered_set<int> word_bucket_t; // a set of word indices with the same partial hash

typedef vector<string> path_t;
typedef vector<path_t> allpaths_t;

template<typename T>
inline void hash_combine(hash_t & H, const T v)
{
    std::hash<T> hash_fn; // standard hash function
    H ^= hash_fn(v) + 0x9e3779b9 + (H<<6) + (H>>2);
}

// hash function for partial hash
class phash_fn {
public:
    size_t operator ()(const partial_hash_t &v) const
    {
        hash_t H = 0;
        hash_combine(H, v.first);
        hash_combine(H, v.second);
        return H;
    }
};

class phash_eq{
public:
    bool operator ()(const partial_hash_t &a, const partial_hash_t &b) const
    {
        return a.first == b.first && a.second == b.second;
    }
};

// lets put words with same partial hashes into same word bucket
typedef unordered_map<partial_hash_t, word_bucket_t, phash_fn, phash_eq> bucket_table_t;

typedef uint16_t vertex_t;

typedef unordered_map<int, word_bucket_t> graph_t;

hash_t str_hash(const string & s, const size_t len)
{
    hash_t H = 0;
    for(size_t i = 0; i < len; ++i)
    {
        hash_combine(H, s[i]);
    }
    return H;
}


hash_t str_hash(const string & s, size_t skip_pos, const size_t len)
{
    hash_t H = 0;
    for(size_t i =0; i < len; ++i)
    {
        if(i != skip_pos)
        {
            hash_combine(H, s[i]);
        }
    }
    return H;
}

class Solution {
public:
    vector<string> word_vec;
    vector<hash_t> full_hashes;

    void compute_hashes(vector<hash_t>& hashes,
        const string & word, const size_t word_len)
    {
        // reset hashes for the current word
        hashes.resize(word_len, 0);

        for(size_t hash_no = 0; hash_no < word_len; ++hash_no)
        {
            // skip character #hash_no when updating hash[hash_no]
            hashes[hash_no] = str_hash(word, hash_no, word_len);
        }
    }

    void connect_graph(graph_t & G, bucket_table_t & wordbuckets,
        const vector<hash_t> & full_hashes)
    {
        // check all the bucket within the same bucket
        for(bucket_table_t::iterator it = wordbuckets.begin(); it != wordbuckets.end(); ++it)
        {
            word_bucket_t & bucket = it->second;
            word_bucket_t::iterator w1, w2, e = bucket.end();

            for(w1 = bucket.begin(); w1 != e; ++w1)
            {
                for(w2 = w1; w2 != e; ++w2)
                {
                    if(w1 == w2) { continue; }

                    const size_t word1_pos = *w1, word2_pos = *w2;

                    // don't connect word with itself (when 'start' matches
                    if(word1_pos != word2_pos && full_hashes[word1_pos] != full_hashes[word2_pos] )
                    {
                        G[word1_pos].insert(word2_pos);
                        G[word2_pos].insert(word1_pos);
                    }
                }
            }
        }
    }

    allpaths_t ladderLength(string start, string end, dict_t &dict)
    {
        const size_t WORD_LEN = start.size();

        bool start_in_dict = (dict.find(start) != dict.end());
        bool end_in_dict = (dict.find(end) != dict.end());

        // modify start and end so that
        // their full hashes are different from words in dict
        start += '^';
        end += '$';

        dict.insert(start);
        dict.insert(end);

        const size_t NUM_WORDS = dict.size();

        word_vec.resize(NUM_WORDS);

        size_t start_pos = INT_MAX, end_pos = INT_MAX;

        full_hashes.resize(NUM_WORDS, 0);


        bucket_table_t wordbuckets;
        vector<hash_t> partial_hashes;

        // for all the words in the dictionary
        dict_t::iterator w = dict.begin();
        for(size_t word_no = 0; w != dict.end(); ++w, ++word_no)
        {
            const string & word = *w;

            hash_t full_hash = str_hash(word, WORD_LEN);

            // compute partial hashes for all partial strings
            // ith partial string is the original string with ith character skipped
            compute_hashes(partial_hashes, word, WORD_LEN);

            // put all words that are different by only one character
            // into wordbuckets of words
            for(size_t hash_no = 0; hash_no < WORD_LEN; ++hash_no)
            {
                const hash_t & H = partial_hashes[hash_no];
                wordbuckets[make_pair(H, hash_no)].insert(word_no);
            }


            if(start_pos == INT_MAX && word == start) { start_pos = word_no; }
            if(end_pos == INT_MAX && word == end) { end_pos = word_no; }

            word_vec[word_no] = word;
            full_hashes[word_no] = full_hash;
        }

        graph_t G;

        connect_graph(G, wordbuckets, full_hashes);

        graph_t::iterator g;

        allpaths_t all;

        if(!start_in_dict)
        {
            g = G.find(start_pos);
            if(g != G.end())
            {
                word_bucket_t & siblings = g->second;

                // remove connection from start_pos to end_pos if they are connected
                // by problem definition path must go throught the graph
                word_bucket_t::iterator s = siblings.find(end_pos);
                if(s != siblings.end()) { siblings.erase(s); }

                if(siblings.empty()) { return all; }
            }
            else { return all; } // no connections from 'start' to any node in the graph? exit
        }

        // no connections from 'end' to any node in the graph? exit
        g = G.find(end_pos);
        if( g == G.end() || g->second.empty()) { return all; }

        unordered_set<vertex_t> visited;

        path_t path;
        dfs(G, visited, start_pos, end_pos, WORD_LEN, all, path);
        return all;
    }


    void dfs(graph_t & G, unordered_set<vertex_t> & visited,
        int vertex, int vertex_end, int word_len,
        allpaths_t & all, path_t & path)
    {
        if(visited.find(vertex) != visited.end() ) { return; }

        visited.insert(vertex);

        const string & S = word_vec[vertex];

        path.push_back(word_vec[vertex].substr(0,word_len));

        if(vertex == vertex_end)
        {
            all.push_back(path);
            path.pop_back();
            return;
        }

        graph_t::iterator g = G.find(vertex);

        if(g != G.end())
        {
            word_bucket_t & siblings = g->second;
            word_bucket_t::iterator sibling = siblings.begin(), end = siblings.end();


            for(; sibling != end; ++sibling)
            {
                const int v = *sibling;
                dfs(G, visited, v, vertex_end, word_len, all, path);
            }
        }
        path.pop_back();
    }
};


#define TEST 1

int _tmain(int argc, _TCHAR* argv[])
{
    dict_t words;
    vector<vector<string> > ret;

#if TEST==1
    words.insert("hot");
    words.insert("dot");
    words.insert("dog");
    words.insert("lot");
    words.insert("log");
    ret = Solution().ladderLength("hit", "cog", words);
#elif TEST==2
    words.insert("hot");
    ret = Solution().ladderLength("hot", "hot", words);
#elif TEST==3
    words.insert("dot");
    words.insert("hot");
    ret = Solution().ladderLength("hot", "hot", words);

#elif TEST==4
    // ["talk","tons","fall","tail","gale","hall","negs"]
    words.insert("talk");
    words.insert("tons");
    words.insert("fall");
    words.insert("tail");
    words.insert("gale");
    words.insert("hall");
    words.insert("negs");
    ret = Solution().ladderLength("talk", "tail", words);
#elif TEST==5
    words.insert("a");
    words.insert("b");
    words.insert("c");
    ret = Solution().ladderLength("a", "c", words);
#endif

    return 0;
}