Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import re
- import requests
- import codecs
- import six
- MEMO_REGEX = re.compile(r'<textarea [^>]* name="memo" [^>]* placeholder="\s*([^"]*)\s*"[^>]*>')
- def pick(s):
- resp = s.get("http://m.todayhumor.co.kr/view.php?table=databox&no=18425")
- if resp.status_code != 200:
- raise IOError("HTTP %d" % (resp.status_code,))
- resp.encoding = "utf-8"
- text = resp.text
- del resp
- return (match.group(1).replace("\r", " ").replace("\n", " ") for match in MEMO_REGEX.finditer(text))
- def load(fname):
- with codecs.open(fname, "r", "utf-8") as f:
- cmts = dict()
- for line in f:
- freq, txt = line.rstrip().split(" ", 1)
- cmts[txt] = int(freq)
- return cmts
- def save(fname, cmts):
- with codecs.open(fname, "w", "utf-8") as f:
- for txt, freq in six.iteritems(cmts):
- f.write("%d %s\n" % (freq, txt))
- if __name__ == '__main__':
- import cookielib
- import time
- s = requests.Session()
- s.headers.update({'User-Agent': 'Mozilla/5.0 (compatible; OU-Witty-Comment-Collector v0.1; written by stdout, mn=581777)'})
- REF = "http://m.todayhumor.co.kr/"
- with open(".cred", "r") as f:
- username = f.readline().strip()
- password = f.readline().strip()
- resp = s.post("https://www.todayhumor.co.kr/member/m_login_end.php", data=dict(ref=REF, id=username, passwd=password), headers={'Referer': REF})
- if resp.status_code != 200:
- raise IOError("Login HTTP %d" % (resp.status_code,))
- if "member_no" not in s.cookies:
- raise IOError("Login failure")
- try:
- cmts = load("witties.txt")
- except (OSError, IOError):
- cmts = dict()
- try:
- i = 0
- while True:
- if i >= 256:
- save("witties.txt", cmts)
- i = 0
- found = False
- for txt in pick(s):
- found = True
- if txt not in cmts:
- print(txt)
- cmts[txt] = 1
- else:
- cmts[txt] += 1
- if not found:
- raise IOError("Something went wrong!")
- del txt
- time.sleep(10)
- except:
- save("witties.txt", cmts)
- raise
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement