Advertisement
imcrazytwkr

Tumblr list checker (stage-2)

Dec 11th, 2018
187
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.61 KB | None | 0 0
  1. #!/usr/bin/env python3
  2. # encoding: utf-8
  3.  
  4. from urllib.request import urlopen
  5. from urllib.parse import urlparse, urlencode
  6. from http.client import HTTPException
  7. from sys import stdout, argv
  8. from os import getenv
  9.  
  10. import logging
  11. import json
  12.  
  13. API_KEY = getenv("TUMBLR_KEY", "8YUsKJvcJxo2MDwmWMDiXZGuMuIbeCwuQGP5ZHSEA4jBJPMnJT")
  14. PARAMS = urlencode({ "api_key": API_KEY })
  15.  
  16. def get_logger(name, level=logging.INFO):
  17.     log_format = logging.Formatter('[%(asctime)s] (%(name)s) %(levelname)s: %(message)s')
  18.  
  19.     std_output = logging.StreamHandler(stdout)
  20.     std_output.setFormatter(log_format)
  21.     std_output.setLevel(level)
  22.  
  23.     logger = logging.getLogger(name)
  24.     logger.setLevel(logging.DEBUG)
  25.     logger.addHandler(std_output)
  26.     return logger
  27.  
  28. # Source file parsing
  29. def parse_list(filename):
  30.     result = []
  31.  
  32.     with open(filename, 'r', encoding="UTF-8") as reader:
  33.         for line in reader:
  34.             clean_line = line.strip()
  35.             if clean_line:
  36.                 result.append(urlparse(clean_line).netloc or clean_line)
  37.  
  38.     return frozenset(result)
  39.  
  40. def blog_has_posts(blog_url):
  41.     log = get_logger(blog_url)
  42.  
  43.     request = "https://api.tumblr.com/v2/blog/{uri}/info?{params}".format(
  44.         params = PARAMS,
  45.         uri = blog_url
  46.     )
  47.  
  48.     try:
  49.         response = urlopen(request)
  50.     except (HTTPException, OSError) as err:
  51.         log.error("Error fetching blog info: {err}".format(err=err))
  52.         return None
  53.  
  54.     content_type = response.info().get_content_type()
  55.  
  56.     if content_type != "application/json":
  57.         log.error("Unexpected Content-Type: {type}".format(type=content_type))
  58.         return None
  59.  
  60.     try:
  61.         data = json.load(response)
  62.     except JSONDecodeError as err:
  63.         log.error("Error parsing blog info: {err}".format(err=err))
  64.         return None
  65.  
  66.     status = data.get("meta", {}).get("status", response.getcode())
  67.  
  68.     if status != 200:
  69.         log.error("Error processing {url}: {data}".format(url=url, data=data))
  70.         return None
  71.  
  72.     if data.get("response", {}).get("blog", {}).get("posts", 0) > 0:
  73.         return True
  74.  
  75.     return False
  76.  
  77.  
  78. if __name__ == "__main__":
  79.     # Parsing arguments
  80.     try:
  81.        source_file = argv[1]
  82.     except IndexError:
  83.        source_file = "tumblrs_raw.txt"
  84.  
  85.     try:
  86.         dest_file = argv[2]
  87.     except IndexError:
  88.         dest_file = "tumblrs_alive.txt"
  89.  
  90.     # Checking blogs
  91.     with open(dest_file, 'a', encoding="UTF-8") as out_file:
  92.         for blog_url in parse_list(source_file):
  93.             if blog_has_posts(blog_url):
  94.                 print(blog_url, file=out_file)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement