Advertisement
Guest User

Untitled

a guest
Apr 25th, 2016
71
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.28 KB | None | 0 0
  1. #!/usr/bin/env python
  2. import re
  3. import requests
  4. import codecs
  5. import six
  6.  
  7. MEMO_REGEX = re.compile(r'<textarea [^>]* name="memo" [^>]* placeholder="\s*([^"]*)\s*"[^>]*>')
  8.  
  9. def pick(s):
  10.     resp = s.get("http://m.todayhumor.co.kr/view.php?table=databox&no=18425")
  11.     if resp.status_code != 200:
  12.         raise IOError("HTTP %d" % (resp.status_code,))
  13.     resp.encoding = "utf-8"
  14.     text = resp.text
  15.     del resp
  16.     return (match.group(1).replace("\r", "&#13;").replace("\n", "&#10;") for match in MEMO_REGEX.finditer(text))
  17.  
  18. def load(fname):
  19.     with codecs.open(fname, "r", "utf-8") as f:
  20.         cmts = dict()
  21.         for line in f:
  22.             freq, txt = line.rstrip().split(" ", 1)
  23.             cmts[txt] = int(freq)
  24.         return cmts
  25.  
  26. def save(fname, cmts):
  27.     with codecs.open(fname, "w", "utf-8") as f:
  28.         for txt, freq in six.iteritems(cmts):
  29.             f.write("%d %s\n" % (freq, txt))
  30.  
  31. if __name__ == '__main__':
  32.     import cookielib
  33.     import time
  34.  
  35.     s = requests.Session()
  36.     s.headers.update({'User-Agent': 'Mozilla/5.0 (compatible; OU-Witty-Comment-Collector v0.1; written by stdout, mn=581777)'})
  37.     REF = "http://m.todayhumor.co.kr/"
  38.     with open(".cred", "r") as f:
  39.         username = f.readline().strip()
  40.         password = f.readline().strip()
  41.     resp = s.post("https://www.todayhumor.co.kr/member/m_login_end.php", data=dict(ref=REF, id=username, passwd=password), headers={'Referer': REF})
  42.     if resp.status_code != 200:
  43.         raise IOError("Login HTTP %d" % (resp.status_code,))
  44.     if "member_no" not in s.cookies:
  45.         raise IOError("Login failure")
  46.  
  47.     try:
  48.         cmts = load("witties.txt")
  49.     except (OSError, IOError):
  50.         cmts = dict()
  51.  
  52.     try:
  53.         i = 0
  54.         while True:
  55.             if i >= 256:
  56.                 save("witties.txt", cmts)
  57.                 i = 0
  58.             found = False
  59.             for txt in pick(s):
  60.                 found = True
  61.                 if txt not in cmts:
  62.                     print(txt)
  63.                     cmts[txt] = 1
  64.                 else:
  65.                     cmts[txt] += 1
  66.             if not found:
  67.                 raise IOError("Something went wrong!")
  68.             del txt
  69.             time.sleep(10)
  70.     except:
  71.         save("witties.txt", cmts)
  72.         raise
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement