Advertisement
Guest User

Untitled

a guest
Jun 16th, 2014
244
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.64 KB | None | 0 0
  1. import re
  2.  
  3. USER_RE = ur'[0-9A-Za-z_-]+'
  4. _URL_RE = re.compile(ur"""\b((?:([\w-]+):(/{1,3})|www[.])(?:(?:(?:[^\s&()]|&amp;|&quot;)*(?:[^!"#$%&'()*+,.:;<=>?@\[\]^`{|}~\s]))|(?:\((?:[^\s&()]|&amp;|&quot;)*\)))+)""")
  5. _USER_RE = re.compile(ur"""(?:(?<=[\s\W])|^)@(%s)""" % USER_RE)
  6. _MSG_RE = re.compile(ur"""(?:(?<=[\s\W])|^)#([0-9A-Za-z]+(?:/[0-9A-Za-z]+)?)""")
  7.  
  8. shittypes = (
  9.     ('url', _URL_RE, lambda m: (m.group(1), clip_long_url(m))),
  10.     ('msg', _MSG_RE, lambda m: (m.group(1),)),
  11.     ('user', _USER_RE, lambda m: (m.group(1),)),
  12. )
  13.  
  14. class LinkParser(object):
  15.     def __init__(self, types=shittypes):
  16.         self.types = types
  17.  
  18.     def parse(self, text):
  19.         # Who the fuck write this piece of shit?
  20.         # TODO: Refactor this shit.
  21.         pos = 0
  22.         texlen = len(text)
  23.         while pos < texlen:
  24.             mins = texlen
  25.             minm = None
  26.             for typ, reg, handler in self.types:
  27.                 m = reg.search(text[pos:])
  28.                 if m is None:
  29.                     continue
  30.                 s = m.start()
  31.                 if s < mins:
  32.                     mins = s
  33.                     minm = (typ, m, handler)
  34.             if not minm:
  35.                 yield text[pos:]
  36.                 return
  37.             else:
  38.                 # TODO: Fix first empty value.
  39.                 yield text[pos:pos + mins]
  40.                 yield ((minm[0], minm[1].group(0)) + minm[2](minm[1]))
  41.                 pos = pos + minm[1].end()
  42.  
  43. _shitparser = LinkParser()
  44.  
  45.  
  46. def linkparse(text):
  47.     return _shitparser.parse(text)
  48.  
  49.  
  50. for m in linkparse("asd @qwerty asd"):
  51.     print('<a href="/u/%s">%s</a>' % (m[2], m[1]))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement