Advertisement
imcrazytwkr

Twitter thread unnester

Jul 8th, 2023
587
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.13 KB | None | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. import json
  4.  
  5.  
  6. def read_json(file_name):
  7.     with open(file_name) as input_file:
  8.         return json.load(input_file)
  9.  
  10.  
  11. def write_json(file_name, tweets):
  12.     with open(file_name, "w") as output_file:
  13.         json.dump(tweets, output_file, sort_keys=True, indent=2, ensure_ascii=False)
  14.  
  15.  
  16. def count_deep(replies):
  17.     total = len(replies)
  18.     if total == 0:
  19.         return total
  20.  
  21.     for reply in replies:
  22.         inner_replies = reply.get("replies", [])
  23.         if inner_replies:
  24.             total += count_deep(inner_replies)
  25.  
  26.     return total
  27.  
  28.  
  29. def is_flat(tweet):
  30.     for reply in tweet.get("replies", []):
  31.         if reply.get("replies", None):
  32.             return False
  33.  
  34.     return True
  35.  
  36.  
  37. def is_nested(tweet, recursive=False):
  38.     replies = tweet.get("replies", [])
  39.     if len(replies) != 1:
  40.         return False
  41.  
  42.     reply = replies[0]
  43.     while reply:
  44.         replies = reply.get("replies", None)
  45.         if not replies:
  46.             return True
  47.  
  48.         if len(replies) > 1:
  49.             return False
  50.  
  51.         reply = replies[0]
  52.  
  53.  
  54. def is_wtf(tweet, recursive=False):
  55.     replies = tweet.get("replies", [])
  56.     if not replies:
  57.         return recursive
  58.  
  59.     if len(replies) > 1:
  60.         return
  61.  
  62.  
  63. def unnest(tweet):
  64.     tweet_replies = tweet.pop("replies", None)
  65.     if not tweet_replies:
  66.         return tweet
  67.  
  68.     unnested = []
  69.     for reply in map(unnest, tweet_replies):
  70.         replies: list = reply.pop("replies", [])
  71.         replies.append(reply)
  72.  
  73.         unnested.extend(replies)
  74.  
  75.     if unnested:
  76.         unnested.sort(key=lambda r: r["id"])
  77.         tweet["replies"] = unnested
  78.  
  79.     return tweet
  80.  
  81.  
  82. if __name__ == "__main__":
  83.     tweets = read_json("./threads.json")
  84.  
  85.     multi_reply = []
  86.     wtf_reply = []
  87.     for tweet in tweets:
  88.         if is_flat(tweet):
  89.             multi_reply.append(tweet)
  90.             continue
  91.  
  92.         if is_nested(tweet):
  93.             multi_reply.append(unnest(tweet))
  94.             continue
  95.  
  96.         wtf_reply.append(tweet)
  97.  
  98.     write_json("./threads_unnested.json", multi_reply)
  99.     write_json("./threads_wtf.json", wtf_reply)
  100.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement