Advertisement
Madmouse

python url detector functions

Jul 20th, 2015
324
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.19 KB | None | 0 0
  1. # ------------------------------------------------------------------------------
  2. # THE BEER-WARE LICENSE (Revision 43):
  3. # <aaronryool@gmail.com> wrote this file. As long as you retain this notice you
  4. # can do whatever you want with this stuff. If we meet some day, and you think
  5. # this stuff is worth it, you can buy me a beer in return
  6. # ------------------------------------------------------------------------------
  7.  
  8. def is_url(url):
  9.     regex = re.compile(
  10.         r'^https?://'  # http:// or https://
  11.         r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|'  # domain...
  12.         r'localhost|'  # localhost...
  13.         r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip
  14.         r'(?::\d+)?'  # optional port
  15.         r'(?:/?|[/?]\S+)$', re.IGNORECASE)
  16.     return url is not None and regex.search(url)
  17.  
  18. def is_slink(url, regex):
  19.     regex = re.compile(regex, re.IGNORECASE)
  20.     return is_url(url) and regex.search(url)
  21.  
  22.  
  23. def scraperfun(keyword, person, command, option):
  24.     print(keyword,person,command,option)
  25.     if command == "add":
  26.         if not is_slink(option, scrapers[keyword]):
  27.             return
  28.         with open(os.path.expanduser("~/tools/bot/lists/"+keyword[1:]), "a+") as f:
  29.             if not any(option == l.rstrip("\r\n") for l in f):
  30.                 f.write(option+'\n')
  31.     elif command == "random":
  32.         random.seed(time.time())
  33.         with open(os.path.expanduser("~/tools/bot/lists/"+keyword[1:]), "r") as f:
  34.             sendmsg(person, random_line(f))
  35.         return
  36.     elif command == "list":
  37.         f = open(os.path.expanduser("~/tools/bot/lists/"+keyword[1:]), "r")
  38.         t = threading.Thread(target=sendlist, args=(person, f))
  39.         t.daemon = True
  40.         t.start()
  41.  
  42. scrapers = {
  43.     "@youtube": r".*youtube\.com/watch\?v\=[A-Z0-9_\-]{11}(&*.)?$|youtu.be/[A-Z0-9_\-]{11}$",
  44.     "@pastebin": r".*pastebin\.com/[A-Z0-9]{8}(&*.)?",
  45.     "@reddit": r".*reddit\.com/[A-Z]{1}/[A-Z0-9_\-]+/comments/[A-Z0-9]{6}/[A-Z0-9_]+/(\?ref=share&ref_source=link)?",
  46.     "@slashdot": r".*slashdot\.org/story/[0-9]{2}/[0-9]{2}/[0-9]{2}/[0-9]{1,10}/[A-Z0-9_]+",
  47.     "@hackthissite": r".*hackthissite\.org/forums/viewtopic.php\?f=[0-9]{1,10}&t=[0-9]{1,10}$"
  48. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement