Guest User

Verify_Alpaca_format.py

a guest
Apr 20th, 2024
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.21 KB | Source Code | 0 0
  1. import ijson
  2. def verify_json_file(filename):
  3.     try:
  4.         with open(filename, 'r', encoding='utf-8') as file: # Parse the JSON objects from the file one by one
  5.             objects = ijson.items(file, 'item')
  6.             for obj in objects:
  7.                 pass  # No need to do anything with the object, we just want to check if it loads
  8.         print("All objects in the file were successfully loaded in UTF-8 as JSON format.")
  9.     except ijson.JSONError:
  10.         print("An error occurred while loading the JSON data. The file may not be properly formatted.")
  11.     except UnicodeDecodeError:
  12.         print("An error occurred while decoding the file. The file may not be in UTF-8 encoding.")
  13.  
  14. verify_json_file('Base_zh_Alpaca-CoT.json')
  15.  
  16.  
  17. import json
  18. def verify_json(file_path):
  19.     with open(file_path, 'r', encoding='utf-8-sig') as f:
  20.         data = json.load(f)
  21.  
  22.     for i, obj in enumerate(data):
  23.         assert 'instruction' in obj, f"Object at index {i} is missing 'instruction'"
  24.         assert 'input' in obj, f"Object at index {i} is missing 'input'"
  25.         assert 'output' in obj, f"Object at index {i} is missing 'output'"
  26.     print("The JSON file is valid.")
  27.  
  28. # verify_json('Base_zh_Alpaca-CoT.json')
Advertisement
Add Comment
Please, Sign In to add comment