Advertisement
Guest User

Untitled

a guest
Apr 25th, 2017
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.62 KB | None | 0 0
  1. import re
  2. from collections import Counter
  3.  
  4. from unidecode import unidecode
  5. from toolz import curry, pipe
  6.  
  7. WHITESPACE = re.compile(r'\s+')
  8. NONALPHANUM = re.compile(r'[^\w_-]')
  9.  
  10.  
  11. @curry
  12. def trim(names, chars=None):
  13. return [i.strip(chars) for i in names]
  14.  
  15.  
  16. def lowercase(names):
  17. return [i.lower() for i in names]
  18.  
  19.  
  20. def deaccent(names):
  21. """Remove accents from characters."""
  22. return [unidecode(i) for i in names]
  23.  
  24.  
  25. @curry
  26. def sub(pattern, repl, names, count=0, flags=0):
  27. """Replace characters that match regular expression."""
  28. regex = re.compile(pattern, flags=flags)
  29. return [regex.sub(repl, i, count=count) for i in names]
  30.  
  31.  
  32. @curry
  33. def replace(old, new, names, count=-1):
  34. """Replace characters that match string."""
  35. return [i.replace(old, new, count) for i in names]
  36.  
  37.  
  38. @curry
  39. def whitespace(repl, names):
  40. """Replace whitespace."""
  41. return sub(WHITESPACE, repl, names)
  42.  
  43.  
  44. @curry
  45. def nonalphanums(repl, names):
  46. """Replace non-alphanumeric charaters."""
  47. return sub(NONALPHANUM, repl, names)
  48.  
  49.  
  50. @curry
  51. def numdups(names, fmt='{name}-{number}'):
  52. """Enumerate duplicate names."""
  53. duplicate = {k: v for k, v in Counter(names).items() if v > 1}
  54. for name, count in duplicate.items():
  55. for num in range(1, count + 1):
  56. idx = names.index(name)
  57. names[idx] = fmt.format(name=name, number=num)
  58. return names
  59.  
  60.  
  61. def as_dict(original_names, names):
  62. """Map slugs to original names {slug-name: orig-name, ...}"""
  63. return dict(zip(names, original_names))
  64.  
  65.  
  66. def slugify(names):
  67. return pipe(names, deaccent, trim, lowercase, whitespace('_'),
  68. nonalphanums(''), numdups)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement