Advertisement
TyphonBaalHammon

Curiouscat backup

Sep 29th, 2024 (edited)
537
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.31 KB | Software | 0 0
  1. import requests
  2. import json
  3. import time
  4. import logging
  5. import random
  6.  
  7. CURIOUSCAT_BASE_URL = "https://curiouscat.live/api/v2.1/profile"
  8.  
  9. ### Curiouscat profile downloader
  10. # Usage : [script] username
  11. # Will use the curiouscat API to download all of a specific profile's messages
  12. # The resulting json files are stored in a folder named "archive" in the execution directory
  13. #
  14.  
  15.  
  16. if __name__ == "__main__":
  17.     import argparse
  18.     import sys
  19.     from datetime import datetime
  20.     from pathlib import Path
  21.     import os
  22.     logging.basicConfig(level=logging.DEBUG)
  23.  
  24.     # Username command line argument
  25.     argparseur = argparse.ArgumentParser(usage="Save curiouscat profiles")
  26.     argparseur.add_argument("username",nargs=1)
  27.    
  28.     args = argparseur.parse_args()
  29.  
  30.     username = args.username[0]
  31.  
  32.     timestamp = datetime.now().timestamp()
  33.  
  34.     num_posts = 1
  35.  
  36.     page_num = 1
  37.  
  38.     # Looping on contents returned by the API
  39.     while num_posts > 0:
  40.         page_num += 1
  41.         CURIOUSCAT_URL_PARAMS = f"?username={username}&max_timestamp={timestamp}&_ob=noregisterOrSignin2"
  42.         URL = CURIOUSCAT_BASE_URL+CURIOUSCAT_URL_PARAMS
  43.  
  44.         #API query
  45.         resp = requests.get(url=URL)
  46.         body = resp.content
  47.         respdict = json.loads(body)
  48.        
  49.         #preparing the file path
  50.         date_string = datetime.fromtimestamp(timestamp).strftime("%Y%m%d-%H%M%S")
  51.         dir_name = f"archive/{username}"
  52.         dir_path = Path(dir_name)
  53.         dir_path.mkdir(parents=True, exist_ok=True)
  54.         file_path = Path(dir_path,f"{username}_archive_{date_string}.json")
  55.  
  56.         #saving to archive dir,
  57.         # in a subdirectory named after the username of the profile
  58.         with open(file_path,"w") as out_file:
  59.             json.dump(respdict,out_file,indent=4)
  60.  
  61.         # Control for the end of the crawling
  62.         # Curiouscat returns a limited number of messages per query
  63.         # The next "page" can be deduced from the timestamp of the last message returned in the
  64.         # response
  65.         num_posts = len(respdict["posts"])
  66.         if  num_posts > 0:
  67.             old_timestamp = timestamp
  68.             timestamp = respdict["posts"][-1]["post"]["timestamp"]
  69.             timestamp = int(timestamp)
  70.             if timestamp == old_timestamp:
  71.                 break
  72.             time.sleep(1+random.random())
  73.  
  74.  
  75.     sys.exit(0)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement