Guest User

WK Review Data Merger

a guest
Dec 8th, 2024
100
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.03 KB | Source Code | 0 0
  1. import pandas as pd
  2. import json
  3. from datetime import datetime
  4.  
  5. def convert_to_unix_timestamp(date_str):
  6.     # Update the format to include seconds if necessary
  7.     dt = datetime.strptime(date_str, "%d/%m/%Y %H:%M:%S")
  8.     return int(dt.timestamp() * 1000)
  9.  
  10. def csv_to_json(csv_file, past_data_file, output_file):
  11.     # Read the CSV file
  12.     df = pd.read_csv(csv_file)
  13.    
  14.     # Create a list to store the resulting data
  15.     result = []
  16.    
  17.     for index, row in df.iterrows():
  18.         # Convert column F to Unix timestamp
  19.         f_value = convert_to_unix_timestamp(row["created_at"])
  20.        
  21.         # Create the desired format [F, A, D, B, C]
  22.         formatted_row = [f_value, row["subject_id"], row["starting_srs_stage"], row["incorrect_meaning_answers"], row["incorrect_reading_answers"]]
  23.         result.append(formatted_row)
  24.    
  25.     # Load existing data from past_data.json
  26.     with open(past_data_file, 'r') as file:
  27.         past_data = json.load(file)
  28.    
  29.     # Merge new data with past data
  30.     combined_data = past_data + result
  31.    
  32.     # Remove duplicates based on (timestamp, A value)
  33.     unique_data = {}
  34.     duplicates_count = 0
  35.    
  36.     for item in combined_data:
  37.         key = (item[0], item[1])  # (timestamp, A value)
  38.         if key not in unique_data:
  39.             unique_data[key] = item
  40.         else:
  41.             duplicates_count += 1
  42.  
  43.     # Convert unique data to a list
  44.     final_data = list(unique_data.values())
  45.    
  46.     # Convert the result to JSON
  47.     json_output = json.dumps(final_data, indent=None)
  48.    
  49.     with open(output_file, 'w') as output:
  50.         output.write(json_output)
  51.    
  52.     return duplicates_count
  53.  
  54. if __name__ == "__main__":
  55.     csv_file = 'reviews.csv'  # Replace with your CSV file path
  56.     past_data_file = 'past_data.json'  # Replace with your past data JSON file path
  57.     output_file = 'output.txt'  # Output file path for the result
  58.     duplicates_removed = csv_to_json(csv_file, past_data_file, output_file)
  59.  
  60.     print(f"Duplicates removed: {duplicates_removed}")
Advertisement
Add Comment
Please, Sign In to add comment