Advertisement
Guest User

Untitled

a guest
May 2nd, 2021
61
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.69 KB | None | 0 0
  1. import sqlite3
  2. import json
  3. from datetime import datetime
  4.  
  5. timeframe = '2019-12'
  6. sql_transaction = []
  7. #creates data base and names it the value in timeframe variable
  8. connection = sqlite3.connect('{}.db'.format(timeframe))
  9. c = connection.cursor()
  10.  
  11. def create_table():
  12.     #creates a table if one does not exist and names it parent reply
  13.     c.execute("""CREATE TABLE IF NOT EXISTS parent_reply
  14. (parent_id TEXT PRIMARY KEY, comment_id TEXT UNIQUE, parent TEXT, comment TEXT,
  15. subreddit TEXT, unix INT, score INT)""")
  16.  
  17. def format_data(data):
  18.     data=data.replace("\n"," newlinechar ").replace("\r"," newlinechar ").replace('"',"'")
  19.     return data
  20.  
  21. def find_parent(pid):
  22.     try:
  23.         sql = "SLECT comment FROM parent_reply WHERE comment_id = '{}' LIMIT 1".format(pid)
  24.         c.execute(sql)
  25.         result = c.fetchone()
  26.         if result != None:
  27.             return result[0]
  28.         else: return False
  29.     except Exception as e:
  30.         print("find_parent", e)
  31.         return False
  32. if __name__=="__main__":
  33.     create_table()
  34.     row_counter = 0
  35.     paired_rows = 0
  36.  
  37.     with open("F:/Nebula0.0.5/chatdata/reddit_data/{}/RC_{}".format(timeframe.split('-')[0], timeframe, buffering=1000) as f:
  38.               for row in f:
  39.                   print(row)
  40.                   row_counter += 1
  41.                   row = json.loads(row)
  42.                   parent_id = row{'parent_id']
  43.                 #this function will clean up data
  44.                   body = format_data(row{'body'})
  45.                   created_utc = row['created_utc']
  46.                   score = row['score']
  47.                   subreddit = row['subreddit']
  48.                   parent_data = find_parent(parent_id)
  49.              
  50.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement