Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import json
- import ijson
- #%%
- #Step 1
- #Write items to separate file
- from ijson import items
- with open('item.json', encoding='windows-1251') as file:
- for item in items(file, '_embedded.items.item'):
- with open('clean_items.json', mode='a+', encoding='windows-1251') as f:
- json.dump(item, f)
- #%%
- #Step 2
- #Write separate items to DB
- from json import JSONDecoder
- from functools import partial
- #following fuction read items one at the time
- def json_parse(fileobj, decoder=JSONDecoder(), buffersize=2048):
- buffer = ''
- for chunk in iter(partial(fileobj.read, buffersize), ''):
- buffer += chunk
- while buffer:
- try:
- result, index = decoder.raw_decode(buffer)
- yield result
- buffer = buffer[index:].lstrip()
- except ValueError:
- # Not enough data to decode, read more
- break
- #%%
- #with usage of json_parse function we can write data to DB
- #without a lot of a memory consumption
- with open('clean_items.json', 'r') as infh:
- for data in json_parse(infh):
- #write data to DB
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement