Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from collections import namedtuple, defaultdict, deque, Counter
- import functools
- """ Task 1 """
- def factor(seq):
- Factor = namedtuple(
- "Factor",
- ["elements", "levels"]
- )
- levels = {}
- i = 0
- for elem in seq:
- if elem not in levels:
- levels[elem] = i
- i += 1
- return Factor([levels[elem] for elem in seq], levels)
- def group_by(seq, key):
- res = defaultdict(list)
- for elem in seq:
- res[key(elem)].append(elem)
- return res
- def invert(d):
- res = defaultdict(set)
- for k, v in d.items():
- res[v].add(k)
- return res
- """ Task 2 """
- def lru_cache(f=None, *, max_size=64):
- if f is None:
- return functools.partial(lru_cache, max_size=max_size)
- maxsize = max_size
- hits = 0
- misses = 0
- currsize = 0
- order = deque()
- cache = {}
- def cache_info():
- nonlocal hits, misses, currsize
- CacheInfo = namedtuple(
- "CacheInfo",
- ["hits", "misses", "maxsize", "currsize"]
- )
- return CacheInfo(hits, misses, maxsize, currsize)
- def cache_clear():
- nonlocal hits, misses, currsize
- hits, misses, currsize = 0, 0, 0
- order.clear()
- cache.clear()
- @functools.wraps(f)
- def inner(*args, **kwargs):
- nonlocal hits, misses, currsize, order, cache
- hashed_args = (args, frozenset(kwargs.items()))
- if hashed_args in cache:
- hits += 1
- order.remove(hashed_args)
- order.append(hashed_args)
- return cache[hashed_args]
- misses += 1
- res = f(*args, **kwargs)
- if currsize == max_size:
- cache[hashed_args] = res
- del cache[order.popleft()]
- order.append(hashed_args)
- else:
- cache[hashed_args] = res
- order.append(hashed_args)
- currsize += 1
- return res
- inner.cache_info = cache_info
- inner.cache_clear = cache_clear
- return inner
- """ Task 3 """
- def build_graph(words, mismatch_percent):
- res = {}
- for i in range(len(words)):
- neighbours = []
- for j in range(len(words)):
- if j != i and len(words[i]) == len(words[j]) \
- and len([k for k in range(len(words[i])) if words[i][k] != words[j][k]]) \
- <= mismatch_percent * len(words[i]) / 100:
- neighbours.append(j)
- res[i] = neighbours
- return res
- def export_graph(g, labels):
- res = "graph {\n"
- for i in range(len(labels)):
- res += f'{i} [label="{labels[i]}"]\n'
- for v in g[i]:
- if f'{i} -- {v}\n' not in res and f'{v} -- {i}\n' not in res:
- res += f'{i} -- {v}\n'
- return res + "}"
- def find_connected_components(g):
- res = [{list(g.keys())[0]}]
- for v in g:
- added = False
- for n in g[v]:
- for k in res:
- if v in k or n in k:
- k.add(v)
- k.add(n)
- added = True
- break
- if not added:
- res.append({v})
- return res
- def find_consensus(l):
- res = []
- for i in range(len(l[0])):
- res.append(Counter([x[i] for x in l]).most_common(1).pop()[0])
- return "".join(res)
- def correct_typos(words, mismatch_percent):
- con_comp = find_connected_components(build_graph(words, mismatch_percent))
- cons_str = []
- for x in con_comp:
- # Special using of "+=" with list for readable (in other case we need a cycle along the len(x))
- cons_str += [find_consensus([words[i] for i in x])] * len(x)
- return cons_str
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement