Advertisement
Guest User

Untitled

a guest
Jul 22nd, 2014
229
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.92 KB | None | 0 0
  1. #! /usr/bin/env python
  2. # -*- coding: utf-8 -*-
  3. # vim:fenc=utf-8
  4. """
  5.  
  6. """
  7. import logging
  8. from random import randint
  9. from time import time
  10. log = logging.getLogger(__name__)
  11.  
  12. class Bound(list):
  13.  
  14.     @classmethod
  15.     def from_strings(cls, strings):
  16.         result = cls([len(s) for s in strings])
  17.         result.strings = strings
  18.         result.bound_cache = {}
  19.         result.result_cache = {}
  20.         result.char_set = set(strings[0])
  21.         return result
  22.  
  23.     def copy_empty(self):
  24.         result = self.__class__()
  25.         result.strings = self.strings
  26.         result.bound_cache = self.bound_cache
  27.         result.char_set = self.char_set
  28.         result.result_cache = self.result_cache
  29.         return result
  30.  
  31.     def __hash__(self):
  32.         return tuple(self).__hash__()
  33.  
  34.     def cut(self, other):
  35.         for i in xrange(1, len(self)):
  36.             if (((self[0] > other[0]) and (self[i] < other[i])) or
  37.                 ((self[0] < other[0]) and (self[i] > other[i]))):
  38.                 return True
  39.         return False
  40.  
  41.     def top_bounds(self):
  42.         strings = self.strings
  43.         result = []
  44.         visited = set()
  45.         for i in xrange(self[0] - 1, -1, -1):
  46.             if len(visited) == len(self.char_set): break
  47.             c = strings[0][i]
  48.             if not c in visited:
  49.                 visited.add(c)
  50.                 b = None
  51.                 if (i in self.bound_cache) and (not self.bound_cache[i].cut(self)):
  52.                     b = self.bound_cache[i]
  53.                 else:
  54.                     b = self.copy_empty()
  55.                     b.append(i)
  56.                     for j in xrange(1, len(strings)):
  57.                         match_pos = strings[j].rfind(c, 0, self[j])
  58.                         if match_pos == -1:
  59.                             break
  60.                         b.append(match_pos)
  61.                     if match_pos == -1:
  62.                         b = None
  63.  
  64.                 if b is not None:
  65.                     if i not in self.bound_cache:
  66.                         self.bound_cache[i] = b
  67.  
  68.                     contained = False
  69.                     for o in result:
  70.                         if not o.cut(b):
  71.                             contained = True
  72.                             break
  73.  
  74.                     # if no bound has been found or is not contained by
  75.                     # another bound
  76.                     if len(result) == 0 or not contained:
  77.                         result.append(b)
  78.  
  79.         return result
  80.  
  81. class Object(object):
  82.     pass
  83.  
  84. def longest_common_string(strings, bound=None, counter=None):
  85.     if counter is None:
  86.         counter = Object()
  87.         counter.val = 0
  88.  
  89.     if bound is None:
  90.         bound = Bound.from_strings(strings)
  91.  
  92.     if bound in bound.result_cache:
  93.         return bound.result_cache[bound]
  94.  
  95.     result = 0
  96.     top_bounds = bound.top_bounds()
  97.     for b in top_bounds:
  98.         counter.val += 1
  99.         lcs = longest_common_string(strings, b, counter)
  100.         if lcs + 1 > result:
  101.             result = lcs + 1
  102.  
  103.     bound.result_cache[bound] = result
  104.  
  105.     return result
  106.  
  107. def bf_longest_common_strings(strings):
  108.     for s in strings:
  109.         if len(s) == 0:
  110.             return 0
  111.  
  112.     result = 0
  113.     match = True
  114.     for i, s in enumerate(strings[1:], 1):
  115.         if s[-1] != strings[0][-1]:
  116.             match = False
  117.             if len(s) > 1:
  118.                 new = list(strings)
  119.                 new[i] = s[:-1]
  120.                 result = max(result, bf_longest_common_strings(new))
  121.  
  122.     if match:
  123.         lcs = bf_longest_common_strings([s[:-1] for s in strings])
  124.         result = max(result, lcs + 1)
  125.  
  126.     result = max(result, bf_longest_common_strings([strings[0][:-1]] + strings[1:]))
  127.  
  128.     return result
  129.  
  130.  
  131. def gen_strings(N, max_len=5):
  132.     result = []
  133.     for i in xrange(N):
  134.         s = ''.join([chr(randint(97, 97 + 5)) for i in xrange(randint(0, max_len))])
  135.         result.append(s)
  136.     return result
  137.  
  138. if __name__ == "__main__":
  139.     test_simple = False
  140.     test_correctness = False
  141.     test_performance = True
  142.     test_load = False
  143.  
  144.     if test_simple:
  145.         strings = ["axbc", "bxcd", "xcd"]
  146.         assert longest_common_string(strings) == 2
  147.  
  148.     if test_correctness:
  149.         for i in xrange(1000):
  150.             if i % 100 == 0:
  151.                 print i
  152.             strings = gen_strings(3, max_len=6)
  153.             assert(longest_common_string(strings) == bf_longest_common_strings(strings))
  154.  
  155.     if test_performance:
  156.         ts = time()
  157.         strings = gen_strings(20, max_len=200)
  158.         print longest_common_string(strings)
  159.         print "Time taken by single longest_common_strings: ", time() - ts
  160.  
  161.     if test_load:
  162.         ts = time()
  163.         for i in xrange(1000):
  164.             if i % 10 == 0:
  165.                 print i
  166.             strings = gen_strings(10, max_len=200)
  167.             longest_common_string(strings)
  168.         print "Time taken by 1000 longest_common_strings: ", time() - ts
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement