Guest User

Untitled

a guest
Dec 19th, 2017
156
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.49 KB | None | 0 0
  1. import sqlite3
  2. import json
  3. from datetime import datetime
  4.  
  5. timeframe = '2007-02'
  6. sql_transaction = []
  7.  
  8. connection = sqlite3.connect('{}.db' .format(timeframe))
  9. c = connection.cursor()
  10.  
  11. def create_table():
  12.     c.execute("""CREATE TABLE IF NOT EXISTS parent_reply
  13. (parent_id TEXT PRIMARY KEY, comment_id TEXT UNIQUE, parent TEXT,
  14. comment TEXT, subreddit TEXT, unix INT, score INT)""")
  15.    
  16. def format_data(date):
  17.     data = data.replace("\n"," newlinechar ").replace("\r"," newlinechar ").replace('"',"'")
  18.     return data
  19.        
  20. def find_parent(pid):
  21.     try:
  22.         sql = "SELECT comment FROM parent_reply WHERE comment_id = '{}' LIMIT 1".format(pid)
  23.         c.execture(sql)
  24.         result = c.fetchone()
  25.         if result != None:
  26.             return result [0]
  27.         else: return False
  28.     except Exception as e:
  29.         #print ("find_parent", e)
  30.         return False
  31.        
  32.    
  33. if __name__ == "__main__":
  34.     create_table()
  35.     row_counter = 0
  36.     paired_rows = 0
  37.    
  38.     with open("/home/anonymouz/Desktop/redditdata/{}/RC_{}".format(timeframe.split('-')[0], timeframe ), buffering=1000) as f:
  39.         for row in f:
  40.             print(row)
  41.             row_counter += 1
  42.             row = json.loads(row)
  43.             parent_id = row['parent_id']
  44.             body = format_data(row['body'])
  45.             created_utc = row['created_utc']
  46.             score = row['score']
  47.             subreddit = row['subreddit']
  48.            
  49.             parent_data = find_parent(parent_id)
Add Comment
Please, Sign In to add comment