Untitled

#! /usr/bin/env python
# -*- coding: utf-8 -*-
# vim:fenc=utf-8
"""

"""
import logging
from random import randint
from time import time
log = logging.getLogger(__name__)

class Bound(list):

    @classmethod
    def from_strings(cls, strings):
        result = cls([len(s) for s in strings])
        result.strings = strings
        result.bound_cache = {}
        result.result_cache = {}
        result.char_set = set(strings[0])
        return result

    def copy_empty(self):
        result = self.__class__()
        result.strings = self.strings
        result.bound_cache = self.bound_cache
        result.char_set = self.char_set
        result.result_cache = self.result_cache
        return result

    def __hash__(self):
        return tuple(self).__hash__()

    def cut(self, other):
        for i in xrange(1, len(self)):
            if (((self[0] > other[0]) and (self[i] < other[i])) or
                ((self[0] < other[0]) and (self[i] > other[i]))):
                return True
        return False

    def top_bounds(self):
        strings = self.strings
        result = []
        visited = set()
        for i in xrange(self[0] - 1, -1, -1):
            if len(visited) == len(self.char_set): break
            c = strings[0][i]
            if not c in visited:
                visited.add(c)
                b = None
                if (i in self.bound_cache) and (not self.bound_cache[i].cut(self)):
                    b = self.bound_cache[i]
                else:
                    b = self.copy_empty()
                    b.append(i)
                    for j in xrange(1, len(strings)):
                        match_pos = strings[j].rfind(c, 0, self[j])
                        if match_pos == -1:
                            break
                        b.append(match_pos)
                    if match_pos == -1:
                        b = None

                if b is not None:
                    if i not in self.bound_cache:
                        self.bound_cache[i] = b

                    contained = False
                    for o in result:
                        if not o.cut(b):
                            contained = True
                            break

                    # if no bound has been found or is not contained by
                    # another bound
                    if len(result) == 0 or not contained:
                        result.append(b)

        return result

class Object(object):
    pass

def longest_common_string(strings, bound=None, counter=None):
    if counter is None:
        counter = Object()
        counter.val = 0

    if bound is None:
        bound = Bound.from_strings(strings)

    if bound in bound.result_cache:
        return bound.result_cache[bound]

    result = 0
    top_bounds = bound.top_bounds()
    for b in top_bounds:
        counter.val += 1
        lcs = longest_common_string(strings, b, counter)
        if lcs + 1 > result:
            result = lcs + 1

    bound.result_cache[bound] = result

    return result

def bf_longest_common_strings(strings):
    for s in strings:
        if len(s) == 0:
            return 0

    result = 0
    match = True
    for i, s in enumerate(strings[1:], 1):
        if s[-1] != strings[0][-1]:
            match = False
            if len(s) > 1:
                new = list(strings)
                new[i] = s[:-1]
                result = max(result, bf_longest_common_strings(new))

    if match:
        lcs = bf_longest_common_strings([s[:-1] for s in strings])
        result = max(result, lcs + 1)

    result = max(result, bf_longest_common_strings([strings[0][:-1]] + strings[1:]))

    return result


def gen_strings(N, max_len=5):
    result = []
    for i in xrange(N):
        s = ''.join([chr(randint(97, 97 + 5)) for i in xrange(randint(0, max_len))])
        result.append(s)
    return result

if __name__ == "__main__":
    test_simple = False
    test_correctness = False
    test_performance = True
    test_load = False

    if test_simple:
        strings = ["axbc", "bxcd", "xcd"]
        assert longest_common_string(strings) == 2

    if test_correctness:
        for i in xrange(1000):
            if i % 100 == 0:
                print i
            strings = gen_strings(3, max_len=6)
            assert(longest_common_string(strings) == bf_longest_common_strings(strings))

    if test_performance:
        ts = time()
        strings = gen_strings(20, max_len=200)
        print longest_common_string(strings)
        print "Time taken by single longest_common_strings: ", time() - ts

    if test_load:
        ts = time()
        for i in xrange(1000):
            if i % 10 == 0:
                print i
            strings = gen_strings(10, max_len=200)
            longest_common_string(strings)
        print "Time taken by 1000 longest_common_strings: ", time() - ts