Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #! /usr/bin/env python
- # -*- coding: utf-8 -*-
- # vim:fenc=utf-8
- """
- """
- import logging
- from random import randint
- from time import time
- log = logging.getLogger(__name__)
- class Bound(list):
- @classmethod
- def from_strings(cls, strings):
- result = cls([len(s) for s in strings])
- result.strings = strings
- result.bound_cache = {}
- result.result_cache = {}
- result.char_set = set(strings[0])
- return result
- def copy_empty(self):
- result = self.__class__()
- result.strings = self.strings
- result.bound_cache = self.bound_cache
- result.char_set = self.char_set
- result.result_cache = self.result_cache
- return result
- def __hash__(self):
- return tuple(self).__hash__()
- def cut(self, other):
- for i in xrange(1, len(self)):
- if (((self[0] > other[0]) and (self[i] < other[i])) or
- ((self[0] < other[0]) and (self[i] > other[i]))):
- return True
- return False
- def top_bounds(self):
- strings = self.strings
- result = []
- visited = set()
- for i in xrange(self[0] - 1, -1, -1):
- if len(visited) == len(self.char_set): break
- c = strings[0][i]
- if not c in visited:
- visited.add(c)
- b = None
- if (i in self.bound_cache) and (not self.bound_cache[i].cut(self)):
- b = self.bound_cache[i]
- else:
- b = self.copy_empty()
- b.append(i)
- for j in xrange(1, len(strings)):
- match_pos = strings[j].rfind(c, 0, self[j])
- if match_pos == -1:
- break
- b.append(match_pos)
- if match_pos == -1:
- b = None
- if b is not None:
- if i not in self.bound_cache:
- self.bound_cache[i] = b
- contained = False
- for o in result:
- if not o.cut(b):
- contained = True
- break
- # if no bound has been found or is not contained by
- # another bound
- if len(result) == 0 or not contained:
- result.append(b)
- return result
- class Object(object):
- pass
- def longest_common_string(strings, bound=None, counter=None):
- if counter is None:
- counter = Object()
- counter.val = 0
- if bound is None:
- bound = Bound.from_strings(strings)
- if bound in bound.result_cache:
- return bound.result_cache[bound]
- result = 0
- top_bounds = bound.top_bounds()
- for b in top_bounds:
- counter.val += 1
- lcs = longest_common_string(strings, b, counter)
- if lcs + 1 > result:
- result = lcs + 1
- bound.result_cache[bound] = result
- return result
- def bf_longest_common_strings(strings):
- for s in strings:
- if len(s) == 0:
- return 0
- result = 0
- match = True
- for i, s in enumerate(strings[1:], 1):
- if s[-1] != strings[0][-1]:
- match = False
- if len(s) > 1:
- new = list(strings)
- new[i] = s[:-1]
- result = max(result, bf_longest_common_strings(new))
- if match:
- lcs = bf_longest_common_strings([s[:-1] for s in strings])
- result = max(result, lcs + 1)
- result = max(result, bf_longest_common_strings([strings[0][:-1]] + strings[1:]))
- return result
- def gen_strings(N, max_len=5):
- result = []
- for i in xrange(N):
- s = ''.join([chr(randint(97, 97 + 5)) for i in xrange(randint(0, max_len))])
- result.append(s)
- return result
- if __name__ == "__main__":
- test_simple = False
- test_correctness = False
- test_performance = True
- test_load = False
- if test_simple:
- strings = ["axbc", "bxcd", "xcd"]
- assert longest_common_string(strings) == 2
- if test_correctness:
- for i in xrange(1000):
- if i % 100 == 0:
- print i
- strings = gen_strings(3, max_len=6)
- assert(longest_common_string(strings) == bf_longest_common_strings(strings))
- if test_performance:
- ts = time()
- strings = gen_strings(20, max_len=200)
- print longest_common_string(strings)
- print "Time taken by single longest_common_strings: ", time() - ts
- if test_load:
- ts = time()
- for i in xrange(1000):
- if i % 10 == 0:
- print i
- strings = gen_strings(10, max_len=200)
- longest_common_string(strings)
- print "Time taken by 1000 longest_common_strings: ", time() - ts
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement